customer_id,gender,age,income,churn
1,Мужской,25,50000,0
2,Женский,30,60000,1
3,Женский,,52000,0
4,Мужской,45,80000,1
5,,35,72000,0
<...пропущено для краткости...>
95,Женский,27,52000,0
96,Мужской,,70000,0
97,Женский,,69000,1
98,Мужской,33,66000,0
99,,45,83000,1
100,Мужской,25,52000,0
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
data = pd.read_csv("customer_churn.csv")
data.fillna(method='ffill', inplace=True)
data = pd.get_dummies(data, drop_first=True)
X = data.drop("churn", axis=1)
y = data["churn"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression(solver="liblinear")
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
print(f"Верно угаданные метки (accuracy): {accuracy:.4f}")
print(f"Точность: {precision:.4f}")
print(f"Полнота: {recall:.4f}")
print(f"F1-мера: {f1:.4f}")
print(f"Матрица ошибок:\n {cm}")
Верно угаданные метки (accuracy): 0.9000
Точность: 0.8571
Полнота: 1.0000
F1-мера: 0.9231
Матрица ошибок:
[[ 6 2]
[ 0 12]]