fin separation front/back

2024-06-23 17:44:26 +02:00
parent 15e1674cb2
commit 7dafa78bc4
13 changed files with 201 additions and 131 deletions
--- a/backend/classification_strategy.py
+++ b/backend/classification_strategy.py
@@ -0,0 +1,45 @@
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+from sklearn.preprocessing import LabelEncoder
+
+def perform_classification(data, data_name, target_name, test_size):
+    X = data[data_name]
+    y = data[target_name]
+
+    label_encoders = {}
+    for column in X.select_dtypes(include=['object']).columns:
+        le = LabelEncoder()
+        X[column] = le.fit_transform(X[column])
+        label_encoders[column] = le
+
+    if y.dtype == 'object':
+        le = LabelEncoder()
+        y = le.fit_transform(y)
+        label_encoders[target_name] = le
+    else:
+        if y.nunique() > 10:
+            raise ValueError("The target variable seems to be continuous. Please select a categorical target for classification.")
+    
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
+
+    model = LogisticRegression()
+    model.fit(X_train, y_train)
+    y_pred = model.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+
+    return model, label_encoders, accuracy
+
+def make_prediction(model, label_encoders, data_name, target_name, input_values):
+    X_new = []
+    for feature, value in zip(data_name, input_values):
+        if feature in label_encoders:
+            value = label_encoders[feature].transform([value])[0]
+        X_new.append(value)
+    
+    prediction = model.predict([X_new])
+
+    if target_name in label_encoders:
+        prediction = label_encoders[target_name].inverse_transform(prediction)
+    
+    return prediction[0]
--- a/backend/dbscan_strategy.py
+++ b/backend/dbscan_strategy.py
@@ -0,0 +1,17 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.cluster import DBSCAN
+
+def perform_dbscan_clustering(data, data_name, eps, min_samples):
+    x = data[data_name].to_numpy()
+    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
+    y_dbscan = dbscan.fit_predict(x)
+
+    fig = plt.figure()
+    if len(data_name) == 2:
+        ax = fig.add_subplot(projection='rectilinear')
+        plt.scatter(x[:, 0], x[:, 1], c=y_dbscan, s=50, cmap="viridis")
+    else:
+        ax = fig.add_subplot(projection='3d')
+        ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=y_dbscan, s=50, cmap="viridis")
+    return fig
--- a/backend/kmeans_strategy.py
+++ b/backend/kmeans_strategy.py
@@ -0,0 +1,21 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.cluster import KMeans
+
+def perform_kmeans_clustering(data, data_name, n_clusters, n_init, max_iter):
+    x = data[data_name].to_numpy()
+    kmeans = KMeans(n_clusters=n_clusters, init="random", n_init=n_init, max_iter=max_iter, random_state=111)
+    y_kmeans = kmeans.fit_predict(x)
+
+    fig = plt.figure()
+    if len(data_name) == 2:
+        ax = fig.add_subplot(projection='rectilinear')
+        plt.scatter(x[:, 0], x[:, 1], c=y_kmeans, s=50, cmap="viridis")
+        centers = kmeans.cluster_centers_
+        plt.scatter(centers[:, 0], centers[:, 1], c="black", s=200, marker="X")
+    else:
+        ax = fig.add_subplot(projection='3d')
+        ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=y_kmeans, s=50, cmap="viridis")
+        centers = kmeans.cluster_centers_
+        ax.scatter(centers[:, 0], centers[:, 1], centers[:, 2], c="black", s=200, marker="X")
+    return fig
--- a/backend/norm_strategy.py
+++ b/backend/norm_strategy.py
--- a/backend/regression_strategy.py
+++ b/backend/regression_strategy.py
@@ -0,0 +1,18 @@
+from sklearn.linear_model import LinearRegression
+
+def perform_regression(data, data_name, target_name):
+    X = data[data_name]
+    y = data[target_name]
+
+    if not isinstance(y.iloc[0], (int, float)):
+        raise ValueError("The target variable should be numeric (continuous) for regression.")
+
+    model = LinearRegression()
+    model.fit(X, y)
+    
+    return model
+
+def make_prediction(model, feature_names, input_values):
+    prediction = model.predict([input_values])
+
+    return prediction[0]
--- a/backend/visualization_strategy.py
+++ b/backend/visualization_strategy.py
@@ -0,0 +1,16 @@
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+def plot_histogram(data, column):
+    fig, ax = plt.subplots()
+    ax.hist(data[column].dropna(), bins=20, edgecolor='k')
+    ax.set_title(f"Histogram of {column}")
+    ax.set_xlabel(column)
+    ax.set_ylabel("Frequency")
+    return fig
+
+def plot_boxplot(data, column):
+    fig, ax = plt.subplots()
+    sns.boxplot(data=data, x=column, ax=ax)
+    ax.set_title(f"Boxplot of {column}")
+    return fig