diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..be4df9b --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,5 @@ +# These are supported funding model platforms + +github: hardlyhuman +patreon: gsriharsha +custom: ["https://www.paypal.me/SRIHARSHAGAJAVALLI"] diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..dd84ea7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md new file mode 100644 index 0000000..48d5f81 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/custom.md @@ -0,0 +1,10 @@ +--- +name: Custom issue template +about: Describe this issue template's purpose here. +title: '' +labels: '' +assignees: '' + +--- + + diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/workflows/django.yml b/.github/workflows/django.yml new file mode 100644 index 0000000..c806047 --- /dev/null +++ b/.github/workflows/django.yml @@ -0,0 +1,30 @@ +name: Django CI + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + max-parallel: 4 + matrix: + python-version: [3.6, 3.7, 3.8] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Run Tests + run: | + python manage.py test diff --git a/iids/analyzer/Feature_Extraction.py b/iids/analyzer/Feature_Extraction.py new file mode 100644 index 0000000..3ac22c9 --- /dev/null +++ b/iids/analyzer/Feature_Extraction.py @@ -0,0 +1,79 @@ +#importing libraries +import numpy as np +import pandas as pd +import os +import matplotlib as mpl +import matplotlib.pyplot as plt +print(os.listdir("/")) + +#change it according ur directory +df = pd.read_csv('/Intelligent-Intrusion-Detection-System/datasets/kddcup.data.gz') +print(df.shape()) + +df.columns =["duration","protocol_type","service","flag","src_bytes", + "dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins", + "logged_in","num_compromised","root_shell","su_attempted","num_root", + "num_file_creations","num_shells","num_access_files","num_outbound_cmds", + "is_host_login","is_guest_login","count","srv_count","serror_rate", + "srv_serror_rate","rerror_rate","srv_rerror_rate","same_srv_rate", + "diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count", + "dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate", + "dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate", + "dst_host_rerror_rate","dst_host_srv_rerror_rate","label"] + +X = df.iloc[:,:41] +y = df.iloc[:,-1] +X.head() + +#Pre-processing of categorial data columns +from sklearn import preprocessing +le = preprocessing.LabelEncoder() +df['protocol_type'] = le.fit_transform(df['protocol_type']) +df['service']= le.fit_transform(df['service']) +df['flag'] = le.fit_transform(df['flag']) + +#Splitting of data +from sklearn.model_selection import train_test_split +X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 42,test_size = 0.3) + +#Scaling of data +from sklearn.preprocessing import MinMaxScaler +scaler = MinMaxScaler(feature_range=(0, 1)) +X_train_scaled = scaler.fit_transform(X_train) +X_test_scaled = scaler.transform(X_test) + +from sklearn.feature_selection import RFECV +from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import StratifiedKFold + +#U can easily change the model by just replacing here +from sklearn.ensemble import RandomForestClassifier +model1 = RandomForestClassifier(n_estimators=100, max_depth=2,random_state=0,class_weight='balanced') + +rfecv = RFECV(estimator=model1, step=1, cv=StratifiedKFold(2), scoring='accuracy' ) +rfecv.fit(X_train_scaled,y_train) + + +print('Optimal number of features: {}'.format(rfecv.n_features_)) + +#Feature Selection plot +plt.figure(figsize=(16, 9)) +plt.title('Recursive Feature Elimination with Cross-Validation', fontsize=18, fontweight='bold', pad=20) +plt.xlabel('Number of features selected', fontsize=14, labelpad=20) +plt.ylabel('% Correct Classification', fontsize=14, labelpad=20) +plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_, color='#303F9F', linewidth=3) + +plt.show() + +dset = pd.DataFrame() +dset['attr'] = X.columns +dset['importance'] = rfecv.estimator_.feature_importances_ +dset = dset.sort_values(by='importance', ascending=False) + +#Feature Ranking plot +plt.figure(figsize=(16, 14)) +plt.barh(y=dset['attr'], width=dset['importance'], color='#1976D2') +plt.title('RFECV - Feature Importances', fontsize=20, fontweight='bold', pad=20) +plt.xlabel('Importance', fontsize=14, labelpad=20) +plt.show() + diff --git a/iids/analyzer/model_predictions/Adaboost.py b/iids/analyzer/model_predictions/Adaboost.py new file mode 100644 index 0000000..1376e00 --- /dev/null +++ b/iids/analyzer/model_predictions/Adaboost.py @@ -0,0 +1,47 @@ + +import numpy as np +import pandas as pd +import os +print(os.listdir("/")) + +df = pd.read_csv('/mnt/d/GSOC/Intelligent-Intrusion-Detection-System/datasets/kddcup.data.gz') +##Change according to the directory of the cloned repo w.r.t dataset location. + +df.columns =["duration","protocol_type","service","flag","src_bytes", + "dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins", + "logged_in","num_compromised","root_shell","su_attempted","num_root", + "num_file_creations","num_shells","num_access_files","num_outbound_cmds", + "is_host_login","is_guest_login","count","srv_count","serror_rate", + "srv_serror_rate","rerror_rate","srv_rerror_rate","same_srv_rate", + "diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count", + "dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate", + "dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate", + "dst_host_rerror_rate","dst_host_srv_rerror_rate","label"] + + +from sklearn.preprocessing import LabelEncoder,OneHotEncoder +le = LabelEncoder() +df['protocol_type'] = le.fit_transform(df['protocol_type']) +df['service']= le.fit_transform(df['service']) +df['flag'] = le.fit_transform(df['flag']) + +X = df.iloc[:,:41] +y = df.iloc[:,-1] + +from sklearn.model_selection import train_test_split +X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 42,test_size = 0.4) + +from sklearn.preprocessing import MinMaxScaler +scaler = MinMaxScaler(feature_range=(0, 1)) +X_train_scaled = scaler.fit_transform(X_train) +X_test_scaled = scaler.transform(X_test) + + + +from sklearn.ensemble import AdaBoostClassifier +ab_model = AdaBoostClassifier() +ab_model.fit(X_train,y_train) + +score = ab_model.score(X_test, y_test) +print(f'The score by AdaBoostClassifier is {score}') + diff --git a/iids/analyzer/model_predictions/Decision_tree.py b/iids/analyzer/model_predictions/Decision_tree.py new file mode 100644 index 0000000..8b99c38 --- /dev/null +++ b/iids/analyzer/model_predictions/Decision_tree.py @@ -0,0 +1,45 @@ + +import numpy as np +import pandas as pd +import os +print(os.listdir("/")) + +df = pd.read_csv('/mnt/d/GSOC/Intelligent-Intrusion-Detection-System/datasets/kddcup.data.gz') +##Change according to the directory of the cloned repo w.r.t dataset location. + +df.columns =["duration","protocol_type","service","flag","src_bytes", + "dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins", + "logged_in","num_compromised","root_shell","su_attempted","num_root", + "num_file_creations","num_shells","num_access_files","num_outbound_cmds", + "is_host_login","is_guest_login","count","srv_count","serror_rate", + "srv_serror_rate","rerror_rate","srv_rerror_rate","same_srv_rate", + "diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count", + "dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate", + "dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate", + "dst_host_rerror_rate","dst_host_srv_rerror_rate","label"] + + +from sklearn.preprocessing import LabelEncoder,OneHotEncoder +le = LabelEncoder() +df['protocol_type'] = le.fit_transform(df['protocol_type']) +df['service']= le.fit_transform(df['service']) +df['flag'] = le.fit_transform(df['flag']) + +X = df.iloc[:,:41] +y = df.iloc[:,-1] + +from sklearn.model_selection import train_test_split +X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 42,test_size = 0.4) + +from sklearn.preprocessing import MinMaxScaler +scaler = MinMaxScaler(feature_range=(0, 1)) +X_train_scaled = scaler.fit_transform(X_train) +X_test_scaled = scaler.transform(X_test) + +from sklearn.tree import DecisionTreeClassifier +dt_model = DecisionTreeClassifier(max_depth=5) +dt_model.fit(X_train,y_train) + +score = dt_model.score(X_test, y_test) +print(f'The score by DecisionTreeClassifier is {score}') + diff --git a/iids/analyzer/model_predictions/knn.py b/iids/analyzer/model_predictions/knn.py new file mode 100644 index 0000000..6264a04 --- /dev/null +++ b/iids/analyzer/model_predictions/knn.py @@ -0,0 +1,45 @@ + +import numpy as np +import pandas as pd +import os +print(os.listdir("/")) + +df = pd.read_csv('/mnt/d/GSOC/Intelligent-Intrusion-Detection-System/datasets/kddcup.data.gz') +##Change according to the directory of the cloned repo w.r.t dataset location. + +df.columns =["duration","protocol_type","service","flag","src_bytes", + "dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins", + "logged_in","num_compromised","root_shell","su_attempted","num_root", + "num_file_creations","num_shells","num_access_files","num_outbound_cmds", + "is_host_login","is_guest_login","count","srv_count","serror_rate", + "srv_serror_rate","rerror_rate","srv_rerror_rate","same_srv_rate", + "diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count", + "dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate", + "dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate", + "dst_host_rerror_rate","dst_host_srv_rerror_rate","label"] + + +from sklearn.preprocessing import LabelEncoder,OneHotEncoder +le = LabelEncoder() +df['protocol_type'] = le.fit_transform(df['protocol_type']) +df['service']= le.fit_transform(df['service']) +df['flag'] = le.fit_transform(df['flag']) + +X = df.iloc[:,:41] +y = df.iloc[:,-1] + +from sklearn.model_selection import train_test_split +X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 42,test_size = 0.4) + +from sklearn.preprocessing import MinMaxScaler +scaler = MinMaxScaler(feature_range=(0, 1)) +X_train_scaled = scaler.fit_transform(X_train) +X_test_scaled = scaler.transform(X_test) + +# n_neighbors are set to 3, after getting max accuracy +from sklearn.neighbors import KNeighborsClassifier +knn_model = KNeighborsClassifier(n_neighbors=3) +knn_model.fit(X_train,y_train) + +score = knn_model.score(X_test, y_test) +print(f'The score by KNClassifier is {score}') diff --git a/iids/analyzer/model_predictions/random_forest.py b/iids/analyzer/model_predictions/random_forest.py new file mode 100644 index 0000000..ac8e867 --- /dev/null +++ b/iids/analyzer/model_predictions/random_forest.py @@ -0,0 +1,45 @@ + + +import numpy as np +import pandas as pd +import os +print(os.listdir("/")) + +df = pd.read_csv('/mnt/d/GSOC/Intelligent-Intrusion-Detection-System/datasets/kddcup.data.gz') +##Change according to the directory of the cloned repo w.r.t dataset location. + +df.columns =["duration","protocol_type","service","flag","src_bytes", + "dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins", + "logged_in","num_compromised","root_shell","su_attempted","num_root", + "num_file_creations","num_shells","num_access_files","num_outbound_cmds", + "is_host_login","is_guest_login","count","srv_count","serror_rate", + "srv_serror_rate","rerror_rate","srv_rerror_rate","same_srv_rate", + "diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count", + "dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate", + "dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate", + "dst_host_rerror_rate","dst_host_srv_rerror_rate","label"] + + +from sklearn.preprocessing import LabelEncoder,OneHotEncoder +le = LabelEncoder() +df['protocol_type'] = le.fit_transform(df['protocol_type']) +df['service']= le.fit_transform(df['service']) +df['flag'] = le.fit_transform(df['flag']) + +X = df.iloc[:,:41] +y = df.iloc[:,-1] + +from sklearn.model_selection import train_test_split +X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 42,test_size = 0.4) + +from sklearn.preprocessing import MinMaxScaler +scaler = MinMaxScaler(feature_range=(0, 1)) +X_train_scaled = scaler.fit_transform(X_train) +X_test_scaled = scaler.transform(X_test) + +from sklearn.ensemble import RandomForestClassifier +rf_model = RandomForestClassifier(n_estimators=100, max_depth=2,random_state=0,class_weight='balanced') +rf_model.fit(X_train,y_train) + +score = rf_model.score(X_test, y_test) +print(f'The score by Random Forest is {score}') diff --git a/iids/analyzer/serializers.py b/iids/analyzer/serializers.py new file mode 100644 index 0000000..b4a8831 --- /dev/null +++ b/iids/analyzer/serializers.py @@ -0,0 +1,18 @@ +#Serializers for input +from rest_framework import serializers +from rest_framework.renderers import JSONRenderer + +class EndpointSerializer(serializers.ModelSerializer): + read_only_fields = ("model_name", "param1", "param2", "param3") + + +serializer = EndpointSerializer(data) +json = JSONRenderer().render(serializer.data) + +#Deserializinf of data +from StringIO import StringIO +from rest_framework.parsers import JSONParser + +stream = StringIO(json) +data = JSONParser().parse(stream) + diff --git a/iids/analyzer/views.py b/iids/analyzer/views.py index 91ea44a..07460e1 100644 --- a/iids/analyzer/views.py +++ b/iids/analyzer/views.py @@ -1,3 +1,39 @@ from django.shortcuts import render +from django.http import HttpResponse # Create your views here. +#loading our trained model +print(" Model loading.......") +model = load_model('attack_labe.hdf5') #after training #TODO +print("Model loaded!!") + + +#class Ml_Algo and functions + +class Random_Forest_Classifier(): + def __init__(self): + default_path = "//" + self.model = load_model("") + + def preprocessing(self, input_data): + df = pd.read_csv('input_data') + df.columns =["duration","protocol_type","service","flag","src_bytes", "dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins", + "logged_in","num_compromised","root_shell","su_attempted","num_root","num_file_creations","num_shells","num_access_files","num_outbound_cmds", + "is_host_login","is_guest_login","count","srv_count","serror_rate","srv_serror_rate","rerror_rate","srv_rerror_rate","same_srv_rate", + "diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count","dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate", + "dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate","dst_host_rerror_rate","dst_host_srv_rerror_rate","label"] + le = LabelEncoder() + df['protocol_type'] = le.fit_transform(df['protocol_type']) + df['service']= le.fit_transform(df['service']) + df['flag'] = le.fit_transform(df['flag']) + + return input_data + + def prediction(self): + try: + input_data = self.preprocessing(input_data) + prediction = self.predict(input_data) + except Exception as e: + return {"status": "Error", "message": str(e)} + + return prediction diff --git a/iids/datasets/kddcup.data.gz b/iids/datasets/kddcup.data.gz new file mode 100644 index 0000000..247e2d0 Binary files /dev/null and b/iids/datasets/kddcup.data.gz differ diff --git a/iids/iids/settings.py b/iids/iids/settings.py index 609fe4c..3976fbe 100644 --- a/iids/iids/settings.py +++ b/iids/iids/settings.py @@ -37,8 +37,11 @@ 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', + 'rest_framework' + 'analyzer' ] + MIDDLEWARE = [ 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware',