import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from yellowbrick.regressor import ResidualsPlot
import folium
import json
import requests
import math
import random
import warnings
from matplotlib import cycler
import seaborn as sns

plt.style.use('ggplot')
warnings.filterwarnings("ignore")


df = pd.read_csv('globalterrorismdb_0221dist.csv')


df.head()


# Only interested in terrorist attacks this century so since 2000
df = df[~(df['iyear'] < 2000)]


df.head()


attack_types = ['Facility/Infrastructure Attack', 'Hijacking', 'Hostage Taking (Barricade Incident)', 'Armed Assault', 'Hostage Taking (Kidnapping)', 'Assassination', 'Unarmed Assault', 'Bombing/Explosion', 'Unknown']


counts = []
for i in range(0, len(attack_types)):
    count = len(df[(df['attacktype1_txt'] == attack_types[i])])
    counts.append(count)


explode=(0,0,0,0,0,0,0,0.1,0)
plt.figure(figsize=(20,10))
plt.title("Share of Terrorism Attacks since 2000")
plt.pie(counts, labels=attack_types, autopct='%1.1f%%', pctdistance=1.1, labeldistance=1.3)
plt.show()


success = len(df[(df['success'] == 1)])
failed = len(df[(df['success'] == 0)])
both = []
both.append(success)
both.append(failed)
labels = ["Success", "Failed"]
plt.figure(figsize=(20,10))
plt.pie(both, labels=labels, autopct='%1.1f%%')
plt.show()


successes = []
failures = []
for i in range(0, len(attack_types)):
    succ = len(df[(df['attacktype1_txt'] == attack_types[i]) & (df['success'] == 1) ])
    successes.append(succ)
    fail = len(df[(df['attacktype1_txt'] == attack_types[i]) & (df['success'] == 0) ])
    failures.append(fail)

plt.figure(figsize=(20,10))
plt.title("Distribution of Successful Attacks across Attack Types since 2000")
plt.ylabel("Number of Attacks")
plt.xlabel("Attack Type")
plt.bar(attack_types, successes, label="Success")
plt.bar(attack_types, failures, label="Failed")
plt.legend()
plt.show()


fatality_by_group = df[df.gname != "Unknown"]
fatality_by_group = fatality_by_group.pivot(index='eventid', columns='gname', values='nkill')

group_fatality_counts = fatality_by_group.sum(axis = 0, skipna = True)
group_fatality_counts = group_fatality_counts.sort_values(ascending=False)
group_fatality_counts = group_fatality_counts[0:15]

plt.figure(figsize=(20,10))
plt.title("Fatalities Across Terrorist Groups since 2000")
plt.xlabel("Terrorist Group")
plt.ylabel("Fatalities")
ax = group_fatality_counts.plot.bar()
plt.xticks(rotation=90)
plt.show()


terrorism_map = folium.Map(location = [50.193980, -6.905573], zoom_start = 2)
markers = random.sample(range(0, 131349), 500)

for idx, row in df.iterrows():
    if idx in markers and row["latitude"] == row["latitude"] and row["longitude"] == row["longitude"]:
        if row["attacktype1"] == 1:
            color = "red"
        elif row["attacktype1"] == 4:
            color = "lightblue"
        elif row["attacktype1"] == 6:
            color = "white"
        elif row["attacktype1"] == 5:
            color = "orange"
        elif row["attacktype1"] == 3:
            color = "black"
        elif row["attacktype1"] == 2 or row["attacktype1"] == 8:
            color = "pink"
        elif row["attacktype1"] == 7:
            color = "purple"
        else:
            color = "gray"
            
        folium.Marker([row["latitude"], row["longitude"]], popup = "<i>" + str(row["iyear"]) + "\n" + str(row["attacktype1_txt"]) + "\nin " + str(row["country_txt"]) + "</i>", icon = folium.Icon(color = color, icon = "info-sign"), tooltip = "View Details").add_to(terrorism_map)

terrorism_map


regions = ['Australasia & Oceania',
 'Central America & Caribbean',
 'Central Asia',
 'East Asia',
 'Eastern Europe',
 'Middle East & North Africa',
 'North America',
 'South America',
 'South Asia',
 'Southeast Asia',
 'Sub-Saharan Africa',
 'Western Europe']


fatality_by_region = df.pivot(index='eventid', columns='region_txt', values='nkill')

region_fatality_counts = fatality_by_region.sum(axis = 0, skipna = True)
region_fatality_counts = region_fatality_counts.sort_values(ascending=False)
region_fatality_counts = region_fatality_counts[0:15]

plt.figure(figsize=(20,10))
plt.title("Number of People Killed in Terrorist Attacks by Region")
plt.xlabel("Regions")
plt.ylabel("Fatalities")
region_fatality_counts.plot.bar()
plt.xticks(rotation=90)
plt.show()


middle_east = df[df['region'] == 10]
middle_east_freq = middle_east.iyear

south_asia = df[df['region'] == 6]
south_asia_freq = south_asia.iyear

africa = df[df['region'] == 11]
africa_freq = africa.iyear

southeast_asia = df[df['region'] == 5]
southeast_asia_freq = southeast_asia.iyear
middle_east = middle_east[middle_east.nkill >= 0]
south_asia = south_asia[south_asia.nkill >= 0]
africa = africa[africa.nkill >= 0]
southeast_asia = southeast_asia[southeast_asia.nkill >= 0]

middle_east_kills = [0.0] * 20
south_asia_kills = [0.0] * 20
africa_kills = [0.0] * 20
southeast_asia_kills = [0.0] * 20
years = [0] * 20

for index in range (0, 20):
    years[index] = index + 2000

for index, row in middle_east.iterrows():
    middle_east_kills[row['iyear'] - 2000] = middle_east_kills[row['iyear'] - 2000] + row['nkill']
    
for index, row in south_asia.iterrows():
    south_asia_kills[row['iyear'] - 2000] = south_asia_kills[row['iyear'] - 2000] + row['nkill']

for index, row in africa.iterrows():
    africa_kills[row['iyear'] - 2000] = africa_kills[row['iyear'] - 2000] + row['nkill']

for index, row in southeast_asia.iterrows():
    southeast_asia_kills[row['iyear'] - 2000] = southeast_asia_kills[row['iyear'] - 2000] + row['nkill']


plt.figure(figsize=(20,10))
plt.plot(years, middle_east_kills, color='gray')
plt.plot(years, south_asia_kills, color='orange')
plt.plot(years, africa_kills, color='green')
plt.plot(years, southeast_asia_kills, color='pink')

plt.legend(handles=[mpatches.Patch(color='gray', label='Middle East & North Africa'),
                    mpatches.Patch(color='orange', label='South Asia'),
                    mpatches.Patch(color='green', label='Sub-Saharan Africa'),
                    mpatches.Patch(color='pink', label='Southeast Asia')])


plt.title('Trends of Fatalities from Terrorist Attacks by Region Over Time')
plt.xlabel('Year')
plt.ylabel('Number of Terrorist Attacks')
plt.xticks([2000, 2005, 2010, 2015, 2020])
plt.show()


global_data = df[df.nkill >= 0]
global_kills = [0.0] * 20

for index, row in global_data.iterrows():
    global_kills[row['iyear'] - 2000] = global_kills[row['iyear'] - 2000] + row['nkill']

plt.figure(figsize=(20,10))
plt.plot(years, global_kills, color='black')

plt.legend(handles=[mpatches.Patch(color='black', label='Global Terrorist Attacks')])

plt.title('Trends of Fatalities from Terrorist Attacks Globally from 2000-2019')
plt.xlabel('Year')
plt.ylabel('Number of Fatalities')
plt.xticks([2000, 2005, 2010, 2015, 2020])
plt.show()


plt.figure(figsize=(20,10))
plt.title('Trends of Terrorist Attacks by Type Over Time')
plt.xlabel('Year')
plt.ylabel('Trends in Attack Types')
plt.xticks([2000, 2005, 2010, 2015, 2020])
colors = ["blue", "orange", "green", "red", "purple", "brown", "pink", "gray", "olive"]
for i in range(0, len(attack_types)):
    type_data = df[df['attacktype1_txt'] == attack_types[i]]
    freq = type_data.iyear
    plt.hist(freq, bins=20, alpha=0.5, label=attack_types[i], color=colors[i])

plt.legend()
plt.show()


plt.figure(figsize=(20,10))
plt.title('Trends of Terrorist Attacks by Type Over Time')
plt.xlabel('Year')
plt.ylabel('Trends in Attack Types')
plt.xticks([2000, 2005, 2010, 2015, 2020])

type_data = df[df['attacktype1_txt'] == "Unknown"]
freq = type_data.iyear
plt.hist(freq, bins=20, alpha=0.5, label="Unknown", color="olive")

plt.legend()
plt.show()


corr_df = df[['country', 'region', 'attacktype1', 'weaptype1', 'targsubtype1', 'nkill', 'nkillter', 'propextent', 'nhostkid', 'nhostkidus', 'nhours', 'ransomamt', 'ransompaid', 'nreleased', 'nwound', 'propvalue', 'nperps']]

plt.figure(figsize=(15,15))
sns.heatmap(corr_df.corr(), annot=True)
plt.show()


x = np.reshape(years,(-1,1))
y = south_asia_kills
regr = LinearRegression()


regr.fit(x, y)

plt.figure(figsize=(15,10))
plt.title("South Asian Terrorism Fatalities since 2000")
plt.ylabel("Fatalities")
plt.xlabel("Year")
plt.scatter(x, y,color='g')
plt.plot(x, regr.predict(x),color='k')
plt.xticks([2000, 2005, 2010, 2015, 2020])
plt.show()


X2 = sm.add_constant(x)
est = sm.OLS(y, X2)
est2 = est.fit()
print(est2.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.905
Model:                            OLS   Adj. R-squared:                  0.900
Method:                 Least Squares   F-statistic:                     172.1
Date:                Mon, 17 May 2021   Prob (F-statistic):           1.19e-10
Time:                        14:55:05   Log-Likelihood:                -165.60
No. Observations:                  20   AIC:                             335.2
Df Residuals:                      18   BIC:                             337.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -1.024e+06   7.84e+04    -13.059      0.000   -1.19e+06   -8.59e+05
x1           511.7195     39.004     13.120      0.000     429.774     593.665
==============================================================================
Omnibus:                        0.488   Durbin-Watson:                   1.355
Prob(Omnibus):                  0.784   Jarque-Bera (JB):                0.577
Skew:                           0.139   Prob(JB):                        0.749
Kurtosis:                       2.215   Cond. No.                     7.00e+05
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large,  7e+05. This might indicate that there are
strong multicollinearity or other numerical problems.


visualizer = ResidualsPlot(regr, size=(1000, 600))
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
visualizer.fit(X_train, y_train)  
visualizer.score(X_test, y_test)  
visualizer.show()

<matplotlib.axes._subplots.AxesSubplot at 0x7f80932dd160>

	eventid	iyear	imonth	iday	approxdate	resolution	country	country_txt	region	...	addnotes	scite1	scite2	scite3	dbsource	INT_LOG	INT_IDEO	INT_MISC	INT_ANY	related
0	197000000001	1970	7	2	NaN	NaN	58	Dominican Republic	2	...	NaN	NaN	NaN	NaN	PGIS	0	0	0	0	NaN
1	197000000002	1970	0	0	NaN	NaN	130	Mexico	1	...	NaN	NaN	NaN	NaN	PGIS	0	1	1	1	NaN
2	197001000001	1970	1	0	NaN	NaN	160	Philippines	5	...	NaN	NaN	NaN	NaN	PGIS	-9	-9	1	1	NaN
3	197001000002	1970	1	0	NaN	NaN	78	Greece	8	...	NaN	NaN	NaN	NaN	PGIS	-9	-9	1	1	NaN
4	197001000003	1970	1	0	NaN	NaN	101	Japan	4	...	NaN	NaN	NaN	NaN	PGIS	-9	-9	1	1	NaN

	eventid	iyear	imonth	iday	approxdate	extended	resolution	country	country_txt	region	...	addnotes	scite1	scite2	scite3	dbsource	INT_LOG	INT_IDEO	INT_MISC	INT_ANY	related
69832	200001010001	2000	1	1	NaN	0	NaN	139	Namibia	11	...	NaN	“Namibia: UNITA Rebels Reportedly Abduct 20 Vi...	“Namibia: 4 Injured in Shootout; UNITA 'Bandit...	“Abducted Namibians Reportedly Still Held by U...	CETIS	1	1	0	1	200001010001, 200001010002
69833	200001010002	2000	1	1	NaN	1	NaN	139	Namibia	11	...	NaN	“Namibia: UNITA Rebels Reportedly Abduct 20 Vi...	“Namibia: 4 Injured in Shootout; UNITA 'Bandit...	“Abducted Namibians Reportedly Still Held by U...	CETIS	1	1	0	1	200001010001, 200001010002
69834	200001010003	2000	1	1	NaN	0	NaN	92	India	6	...	NaN	“Lashkar 'Suicide' Squad Attacks Army Camp in ...	NaN	NaN	CETIS	1	1	0	1	NaN
69835	200001010004	2000	1	1	NaN	0	NaN	1003	Kosovo	9	...	NaN	“Kosovo: Romany Home Attacked, 1 Person Injure...	NaN	NaN	CETIS	-9	-9	1	1	NaN
69836	200001010005	2000	1	1	NaN	0	NaN	182	Somalia	11	...	NaN	“Somalia: 'Over 6' Killed in Mogadishu Attack,...	NaN	NaN	CETIS	-9	-9	0	-9	NaN

An Analysis of Global Terrorism Since 2000¶

Final Tutorial - CMSC 320¶

Authors Glen Joy, Dane Dixon, Arushi Tayal (c) 2021¶

1. Data Collection, Curation, Parsing¶

2. Exploratory Data Analysis¶

2.1 What types of attacks are occurring?¶

2.2 Who are the attackers?¶

2.3 Where are attacks occurring?¶

2.4 How is terrorism changing over time?¶

2.5 Correlations for Possible Hypothesis Testing and Machine Learning¶

3. Hypothesis Testing and Machine Learning¶

4. Communication, Insight, and Application of Criminological Theory¶

4.1 Communication of Approach¶

4.2 Discussion of Findings¶

5. Resources & References¶