import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="whitegrid")
print("Libraries loaded successfully.")Libraries loaded successfully.
clown
Libraries loaded successfully.
# load results.csv into pandas dataframe
import os
import urllib.request
# check if running in Google Colab
try:
import google.colab
IN_COLAB = True
except ImportError:
IN_COLAB = False
# download data if in Colab
if IN_COLAB:
print("Running in Google Colab - downloading data...")
os.makedirs("llm_bug_analysis/results", exist_ok=True)
try:
urllib.request.urlretrieve(
"https://raw.githubusercontent.com/engemkeres/llm-analysis-thesis/main/llm_bug_analysis/results/results.csv",
"llm_bug_analysis/results/results.csv"
)
print("Data downloaded successfully from GitHub.")
except Exception as e:
print(f"Error downloading data: {e}")
print("Please upload results.csv manually.")
# load the data
try:
df = pd.read_csv("llm_bug_analysis/results/results.csv")
print("Data loaded successfully. First 5 rows:")
display(df.head())
print("\nData columns and types:")
df.info()
except FileNotFoundError:
print("ERROR: results.csv not found.")
if IN_COLAB:
print("The automatic download may have failed. Please upload the file manually using:")
print("from google.colab import files")
print("uploaded = files.upload()")
else:
print("Make sure you have run the analysis pipeline first.")Data loaded successfully. First 5 rows:
| timestamp | repo_name | bug_commit_sha | file_path | commit_message | issue_title | issue_body | llm_model | complexity_before_cc | complexity_before_cognitive | ... | complexity_after_llm_avg_params | complexity_after_llm_total_tokens | human_tests_passed | human_lines_added | human_lines_deleted | human_total_diff | complexity_after_human_cc | complexity_after_human_cognitive | complexity_after_human_avg_params | complexity_after_human_total_tokens | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2025-10-11T15:02:54.981034 | mahmoud/boltons | 4815fc8dd1768da5f2d903846d2ab994aa57b0cf | NaN | Test and fix for #348 (#349) | LRU .values() and dict return old entries | Hi,\r\n\r\nFirst of all thanks for the excelle... | manual_llm | 198 | 135 | ... | SKIPPED | SKIPPED | True | 44 | 9 | 53 | 199 | 135 | 1.68 | 5014 |
1 rows × 29 columns
Data columns and types:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 29 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 timestamp 1 non-null object
1 repo_name 1 non-null object
2 bug_commit_sha 1 non-null object
3 file_path 0 non-null float64
4 commit_message 1 non-null object
5 issue_title 1 non-null object
6 issue_body 1 non-null object
7 llm_model 1 non-null object
8 complexity_before_cc 1 non-null int64
9 complexity_before_cognitive 1 non-null int64
10 complexity_before_avg_params 1 non-null float64
11 complexity_before_total_tokens 1 non-null int64
12 llm_patch_applied 1 non-null object
13 llm_tests_passed 1 non-null object
14 ai_lines_added 1 non-null object
15 ai_lines_deleted 1 non-null object
16 ai_total_diff 1 non-null object
17 complexity_after_llm_cc 1 non-null object
18 complexity_after_llm_cognitive 1 non-null object
19 complexity_after_llm_avg_params 1 non-null object
20 complexity_after_llm_total_tokens 1 non-null object
21 human_tests_passed 1 non-null bool
22 human_lines_added 1 non-null int64
23 human_lines_deleted 1 non-null int64
24 human_total_diff 1 non-null int64
25 complexity_after_human_cc 1 non-null int64
26 complexity_after_human_cognitive 1 non-null int64
27 complexity_after_human_avg_params 1 non-null float64
28 complexity_after_human_total_tokens 1 non-null int64
dtypes: bool(1), float64(3), int64(9), object(16)
memory usage: 357.0+ bytes
# convert llm tests passed into numeric type
df_ai = df[df['llm_tests_passed'] != 'SKIPPED'].copy()
if df_ai.empty:
print("No LLM results found in the data. Skipping AI success rate calculation.")
else:
# this part only runs if there is data
df_ai['llm_success'] = df_ai['llm_tests_passed'].astype(bool)
overall_success_rate = df_ai['llm_success'].mean()
print(f"Overall LLM Success Rate (where attempted): {overall_success_rate:.2%}")No LLM results found in the data. Skipping AI success rate calculation.
# group data by repo and calculate success rate for each
if df_ai.empty:
print("No LLM results found in the data. Skipping the repository summary plot.")
else:
# this part only runs if there is data about the llm fix
success_by_repo = df_ai.groupby('repo_name')['llm_success'].mean().sort_values(ascending=False)
print("LLM Success Rate by Repository:")
print(success_by_repo.apply('{:.2%}'.format))
# Create a bar chart to visualize this.
plt.figure(figsize=(10, 6))
success_by_repo.plot(kind='bar')
plt.title('LLM Test Pass Rate by Repository')
plt.ylabel('Success Rate')
plt.xlabel('Repository')
plt.xticks(rotation=45, ha='right')
plt.gca().yaxis.set_major_formatter('{:.0%}'.format)
plt.tight_layout()
plt.show()No LLM results found in the data. Skipping the repository summary plot.