|  | { | 
|  | "cells": [ | 
|  | { | 
|  | "cell_type": "code", | 
|  | "execution_count": null, | 
|  | "metadata": {}, | 
|  | "outputs": [], | 
|  | "source": [ | 
|  | "import pandas as pd\n", | 
|  | "import numpy as np\n", | 
|  | "from pprint import pprint\n", | 
|  | "from collections import Counter\n", | 
|  | "import common\n", | 
|  | "import math" | 
|  | ] | 
|  | }, | 
|  | { | 
|  | "cell_type": "code", | 
|  | "execution_count": null, | 
|  | "metadata": {}, | 
|  | "outputs": [], | 
|  | "source": [ | 
|  | "commit_list_df = pd.read_csv(\"results/classifier/commitlist.csv\")\n", | 
|  | "mean_authors=commit_list_df.query(\"category == 'Uncategorized' & topic != 'not user facing'\").author.to_list()\n", | 
|  | "counts = Counter(mean_authors)\n", | 
|  | "commit_list_df.head()" | 
|  | ] | 
|  | }, | 
|  | { | 
|  | "cell_type": "code", | 
|  | "execution_count": null, | 
|  | "metadata": {}, | 
|  | "outputs": [], | 
|  | "source": [ | 
|  | "commit_list_df.category.describe()" | 
|  | ] | 
|  | }, | 
|  | { | 
|  | "cell_type": "code", | 
|  | "execution_count": null, | 
|  | "metadata": {}, | 
|  | "outputs": [], | 
|  | "source": [ | 
|  | "# The number un categorized and no topic commits\n", | 
|  | "no_category = commit_list_df.query(\"category == 'Uncategorized' & topic != 'not user facing'\")\n", | 
|  | "print(len(no_category))" | 
|  | ] | 
|  | }, | 
|  | { | 
|  | "cell_type": "code", | 
|  | "execution_count": null, | 
|  | "metadata": {}, | 
|  | "outputs": [], | 
|  | "source": [ | 
|  | "# check for cherry-picked commits\n", | 
|  | "example_sha = '55c76baf579cb6593f87d1a23e9a49afeb55f15a'\n", | 
|  | "commit_hashes = set(commit_list_df.commit_hash.to_list())\n", | 
|  | "\n", | 
|  | "example_sha[:11] in commit_hashes" | 
|  | ] | 
|  | }, | 
|  | { | 
|  | "cell_type": "code", | 
|  | "execution_count": null, | 
|  | "metadata": {}, | 
|  | "outputs": [], | 
|  | "source": [ | 
|  | "# Get the difference between known categories and categories from commits\n", | 
|  | "\n", | 
|  | "diff_categories = set(commit_list_df.category.to_list()) - set(common.categories)\n", | 
|  | "print(len(diff_categories))\n", | 
|  | "pprint(diff_categories)" | 
|  | ] | 
|  | }, | 
|  | { | 
|  | "cell_type": "code", | 
|  | "execution_count": null, | 
|  | "metadata": {}, | 
|  | "outputs": [], | 
|  | "source": [ | 
|  | "# Counts of categories\n" | 
|  | ] | 
|  | } | 
|  | ], | 
|  | "metadata": { | 
|  | "kernelspec": { | 
|  | "display_name": "Python 3", | 
|  | "language": "python", | 
|  | "name": "python3" | 
|  | }, | 
|  | "language_info": { | 
|  | "codemirror_mode": { | 
|  | "name": "ipython", | 
|  | "version": 3 | 
|  | }, | 
|  | "file_extension": ".py", | 
|  | "mimetype": "text/x-python", | 
|  | "name": "python", | 
|  | "nbconvert_exporter": "python", | 
|  | "pygments_lexer": "ipython3" | 
|  | }, | 
|  | "vscode": { | 
|  | "interpreter": { | 
|  | "hash": "a867c59af434d7534e61ccb37014830daefd5fcd3816cab68d595dde5e446f52" | 
|  | } | 
|  | } | 
|  | }, | 
|  | "nbformat": 4, | 
|  | "nbformat_minor": 2 | 
|  | } |