From 0bb7d147575425afab32a478d5ef0451810ca448 Mon Sep 17 00:00:00 2001 From: clemfeb Date: Tue, 27 Oct 2020 19:01:24 +0200 Subject: [PATCH] Demo for GitHubUse trying out mergin code --- BootcampStats.ipynb | 270 ++++---------------------------------------- PredictSalary.ipynb | 20 +++- 2 files changed, 37 insertions(+), 253 deletions(-) diff --git a/BootcampStats.ipynb b/BootcampStats.ipynb index 5d80edc..1a5565b 100644 --- a/BootcampStats.ipynb +++ b/BootcampStats.ipynb @@ -11,256 +11,26 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RespondentProfessionalProgramHobbyCountryUniversityEmploymentStatusFormalEducationMajorUndergradHomeRemoteCompanySize...StackOverflowMakeMoneyGenderHighestEducationParentsRaceSurveyLongQuestionsInterestingQuestionsConfusingInterestedAnswersSalaryExpectedSalary
01StudentYes, bothUnited StatesNoNot employed, and not looking for workSecondary schoolNaNNaNNaN...Strongly disagreeMaleHigh schoolWhite or of European descentStrongly disagreeStrongly agreeDisagreeStrongly agreeNaNNaN
12StudentYes, bothUnited KingdomYes, full-timeEmployed part-timeSome college/university study without earning ...Computer science or software engineeringMore than half, but not all, the time20 to 99 employees...Strongly disagreeMaleA master's degreeWhite or of European descentSomewhat agreeSomewhat agreeDisagreeStrongly agreeNaN37500.0
23Professional developerYes, bothUnited KingdomNoEmployed full-timeBachelor's degreeComputer science or software engineeringLess than half the time, but at least one day ...10,000 or more employees...DisagreeMaleA professional degreeWhite or of European descentSomewhat agreeAgreeDisagreeAgree113750.0NaN
34Professional non-developer who sometimes write...Yes, bothUnited StatesNoEmployed full-timeDoctoral degreeA non-computer-focused engineering disciplineLess than half the time, but at least one day ...10,000 or more employees...DisagreeMaleA doctoral degreeWhite or of European descentAgreeAgreeSomewhat agreeStrongly agreeNaNNaN
45Professional developerYes, I program as a hobbySwitzerlandNoEmployed full-timeMaster's degreeComputer science or software engineeringNever10 to 19 employees...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", - "

5 rows × 154 columns

\n", - "
" - ], - "text/plain": [ - " Respondent Professional \\\n", - "0 1 Student \n", - "1 2 Student \n", - "2 3 Professional developer \n", - "3 4 Professional non-developer who sometimes write... \n", - "4 5 Professional developer \n", - "\n", - " ProgramHobby Country University \\\n", - "0 Yes, both United States No \n", - "1 Yes, both United Kingdom Yes, full-time \n", - "2 Yes, both United Kingdom No \n", - "3 Yes, both United States No \n", - "4 Yes, I program as a hobby Switzerland No \n", - "\n", - " EmploymentStatus \\\n", - "0 Not employed, and not looking for work \n", - "1 Employed part-time \n", - "2 Employed full-time \n", - "3 Employed full-time \n", - "4 Employed full-time \n", - "\n", - " FormalEducation \\\n", - "0 Secondary school \n", - "1 Some college/university study without earning ... \n", - "2 Bachelor's degree \n", - "3 Doctoral degree \n", - "4 Master's degree \n", - "\n", - " MajorUndergrad \\\n", - "0 NaN \n", - "1 Computer science or software engineering \n", - "2 Computer science or software engineering \n", - "3 A non-computer-focused engineering discipline \n", - "4 Computer science or software engineering \n", - "\n", - " HomeRemote \\\n", - "0 NaN \n", - "1 More than half, but not all, the time \n", - "2 Less than half the time, but at least one day ... \n", - "3 Less than half the time, but at least one day ... \n", - "4 Never \n", - "\n", - " CompanySize ... StackOverflowMakeMoney Gender \\\n", - "0 NaN ... Strongly disagree Male \n", - "1 20 to 99 employees ... Strongly disagree Male \n", - "2 10,000 or more employees ... Disagree Male \n", - "3 10,000 or more employees ... Disagree Male \n", - "4 10 to 19 employees ... NaN NaN \n", - "\n", - " HighestEducationParents Race SurveyLong \\\n", - "0 High school White or of European descent Strongly disagree \n", - "1 A master's degree White or of European descent Somewhat agree \n", - "2 A professional degree White or of European descent Somewhat agree \n", - "3 A doctoral degree White or of European descent Agree \n", - "4 NaN NaN NaN \n", - "\n", - " QuestionsInteresting QuestionsConfusing InterestedAnswers Salary \\\n", - "0 Strongly agree Disagree Strongly agree NaN \n", - "1 Somewhat agree Disagree Strongly agree NaN \n", - "2 Agree Disagree Agree 113750.0 \n", - "3 Agree Somewhat agree Strongly agree NaN \n", - "4 NaN NaN NaN NaN \n", - "\n", - " ExpectedSalary \n", - "0 NaN \n", - "1 37500.0 \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "\n", - "[5 rows x 154 columns]" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" + "output_type": "error", + "ename": "FileNotFoundError", + "evalue": "[Errno 2] File ./survey_results_public.csv does not exist: './survey_results_public.csv'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'matplotlib'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'inline'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'./survey_results_public.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 674\u001b[0m )\n\u001b[1;32m 675\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 676\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 677\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 678\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 446\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 448\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp_or_buf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 449\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 450\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 878\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"has_index_names\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"has_index_names\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 879\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 880\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 881\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 882\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 1112\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"c\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1113\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"c\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1114\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1115\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1116\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"python\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 1889\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"usecols\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0musecols\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1890\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1891\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1892\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munnamed_cols\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munnamed_cols\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1893\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] File ./survey_results_public.csv does not exist: './survey_results_public.csv'" + ] } ], "source": [ @@ -293,7 +63,7 @@ "#In this case, we want to look at bootcamp data\n", "#First - let's just look at how many people took a bootcamp in the dataset\n", "\n", - "bootcamp_df = df[df['TimeAfterBootcamp'].isnull()==False]\n", + "bootcamp_df = df[df['TimeAfterBootcamp'].isnull()==False] #Sample change only\n", "not_bootcamp_df = df[df['TimeAfterBootcamp'].isnull()==True] \n", "bootcamp_df.shape" ] @@ -750,9 +520,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.7.6-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/PredictSalary.ipynb b/PredictSalary.ipynb index cd285b5..e340504 100644 --- a/PredictSalary.ipynb +++ b/PredictSalary.ipynb @@ -274,9 +274,23 @@ "%matplotlib inline\n", "\n", "df = pd.read_csv('./survey_results_public.csv')\n", - "df.head()" + "df.head() #Sample change for GitHub" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 4, @@ -5774,9 +5788,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.7.6-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file