# === Imports and Setup ===

# --- Library Imports ---
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import math

# --- Safe, future-proof version ---
import warnings
warnings.filterwarnings(
    "ignore",
    message="DataFrameGroupBy.apply operated on the grouping columns",
    category=FutureWarning,
)

# --- Visualization Style ---
sns.set_theme(style="whitegrid")
plt.rcParams.update({
    "figure.facecolor": "white",
    "axes.facecolor": "white",
    "figure.dpi": 120,
})

# --- Paths and Data Loading ---
DATA_DIR = Path("../data/processed")
df_country = pd.read_csv(DATA_DIR / "countries_clean.csv")
df_agg = pd.read_csv(DATA_DIR / "aggregates_clean.csv")

print(f"[INFO] Countries dataset shape:  {df_country.shape}")
print(f"[INFO] Aggregates dataset shape: {df_agg.shape}")

# --- Derived Variable: Net Migration per 1,000 People ---
df_country["net_migration_per_1000"] = (
    df_country["net_migration"] / df_country["population"] * 1000
)
df_country = df_country.drop(columns=["net_migration"])

print("[INFO] Added 'net_migration_per_1000' and dropped 'net_migration' column.")

# --- Save Updated Countries Dataset (with per-1000 variable) ---
save_path = DATA_DIR / "countries_clean.csv"
df_country.to_csv(save_path, index=False)
print(f"[INFO] Updated 'countries_clean.csv' saved to: {save_path}")

[INFO] Countries dataset shape:  (5712, 21)
[INFO] Aggregates dataset shape: (1496, 18)
[INFO] Added 'net_migration_per_1000' and dropped 'net_migration' column.
[INFO] Updated 'countries_clean.csv' saved to: ../data/processed/countries_clean.csv

# === Quick Sanity Checks ===

# --- Coverage Summary ---
n_countries = df_country["Country Name"].nunique()
n_years = df_country["year"].nunique()

print(f"[INFO] Unique countries: {n_countries}")
print(f"[INFO] Years covered:     {n_years}")
print(f"[INFO] Expected rows:     {n_countries * n_years:,}")
print(f"[INFO] Actual rows:       {len(df_country):,}")

# --- Basic Missing Value Check ---
na_cols = df_country.isna().sum()
print("\n[INFO] Columns with any missing values (should be 0 except metadata, if any):")
display(na_cols[na_cols > 0])

# --- Order Income Groups for Consistent Plotting ---
if "IncomeGroup" in df_country.columns:
    order = ["Low income", "Lower middle income", "Upper middle income", "High income"]
    df_country["IncomeGroup"] = pd.Categorical(
        df_country["IncomeGroup"], categories=order, ordered=True
    )
    df_country = df_country[df_country["IncomeGroup"].notna()].copy()
    print("[INFO] Ordered 'IncomeGroup' categories and removed missing entries.")

[INFO] Unique countries: 168
[INFO] Years covered:     34
[INFO] Expected rows:     5,712
[INFO] Actual rows:       5,712

[INFO] Columns with any missing values (should be 0 except metadata, if any):

Series([], dtype: int64)

[INFO] Ordered 'IncomeGroup' categories and removed missing entries.

# === Distribution of Key Socio-Economic and Demographic Indicators ===

# --- Define Numeric Columns of Interest ---
numeric_cols = [
    "pop_density", "mobile_subs", "exports_gdp", "imports_gdp",
    "gdp_growth", "gdp_per_capita", "under5_mortality", "unemployment",
    "adol_fertility", "life_expectancy", "fertility_rate", "pop_growth",
    "population", "urban_pop_growth", "hdi", "net_migration_per_1000",
]

# --- Filter to Columns Present in Data ---
numeric_cols = [c for c in numeric_cols if c in df_country.columns]
print(f"[INFO] Plotting distributions for {len(numeric_cols)} numeric indicators.")

# --- Setup Plot Grid ---
n_cols = 4
n_rows = math.ceil(len(numeric_cols) / n_cols)

plt.figure(figsize=(n_cols * 4.2, n_rows * 3.6))

# --- Plot Histograms ---
for i, col in enumerate(numeric_cols, 1):
    plt.subplot(n_rows, n_cols, i)
    sns.histplot(
        df_country[col],
        kde=True,
        bins=30,
        color="teal",
        alpha=0.8,
    )
    plt.title(col.replace("_", " ").title(), fontsize=10)
    plt.xlabel("")
    plt.ylabel("")
    plt.grid(alpha=0.3, linestyle="--")

plt.tight_layout()
plt.suptitle(
    "Distribution of Key Socio-Economic and Demographic Indicators",
    fontsize=15,
    y=1.02,
)
plt.show()

[INFO] Plotting distributions for 16 numeric indicators.

# === Net Migration Distribution: Original vs Capped ===

# --- Create Capped Version for Visualization ---
CAP = 50
if "net_migration_per_1000_capped" not in df_country.columns:
    df_country["net_migration_per_1000_capped"] = df_country["net_migration_per_1000"].clip(-CAP, CAP)
    print(f"[INFO] Created capped variable 'net_migration_per_1000_capped' at ±{CAP}.")
else:
    print("[INFO] Using existing capped variable 'net_migration_per_1000_capped'.")

# --- Setup Figure ---
fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharey=True)

# --- Original Distribution ---
sns.histplot(df_country["net_migration_per_1000"], bins=40, ax=axes[0], color="steelblue", alpha=0.8)
axes[0].axvline(0, color="red", linestyle="--", lw=1)
axes[0].set_title("Original Distribution")

# --- Capped Distribution ---
sns.histplot(df_country["net_migration_per_1000_capped"], bins=40, ax=axes[1], color="teal", alpha=0.8)
axes[1].axvline(0, color="red", linestyle="--", lw=1)
axes[1].set_title(f"Capped Distribution (±{CAP})")

# --- Common Axes Formatting ---
for ax in axes:
    ax.set_xlabel("Net Migration per 1,000 People")
    ax.set_ylabel("Count")
    ax.grid(alpha=0.3, linestyle="--")

# --- Force Same Y-Scale for Fair Comparison ---
ymax = max(ax.get_ylim()[1] for ax in axes)
for ax in axes:
    ax.set_ylim(0, ymax)

plt.suptitle("Net Migration per 1,000 — Original vs Capped", fontsize=14, y=1.03)
plt.tight_layout()
plt.show()

# --- Distribution Summary Statistics ---
display(
    df_country["net_migration_per_1000"]
        .describe(percentiles=[0.01, 0.05, 0.5, 0.95, 0.99])
        .to_frame("Original"),
    df_country["net_migration_per_1000_capped"]
        .describe(percentiles=[0.01, 0.05, 0.5, 0.95, 0.99])
        .to_frame(f"Capped ±{CAP}"),
)

[INFO] Created capped variable 'net_migration_per_1000_capped' at ±50.

# === Create Crisis Flag ===

# --- Define Binary Crisis Indicator ---
df_country["is_crisis"] = (df_country["net_migration_per_1000"].abs() > CAP).astype(int)
print(f"[INFO] Created binary crisis flag 'is_crisis' (threshold: ±{CAP}).")

# --- Summarize Crisis Counts by Income Group ---
if "IncomeGroup" in df_country.columns:
    crisis_counts = (
        df_country.groupby("IncomeGroup", observed=True)["is_crisis"]
        .sum()
        .sort_values(ascending=False)
    )
    print("\n[INFO] Crisis-year counts by income group:")
    display(crisis_counts)

[INFO] Created binary crisis flag 'is_crisis' (threshold: ±50).

[INFO] Crisis-year counts by income group:

IncomeGroup
High income            29
Low income             19
Lower middle income    14
Upper middle income     4
Name: is_crisis, dtype: int64

# === Outlier Countries by Income Group (±50 per 1,000) ===

import math

# --- Define Helper Function ---
def crisis_table_by_income(df, value_col="net_migration_per_1000", cap=50):
    """
    Generate a summary table of countries with crisis-level migration
    (|net_migration_per_1000| > cap) grouped by IncomeGroup.
    Returns both a display table and a count bar chart.
    """
    groups = df["IncomeGroup"].dropna().unique()
    result, counts = {}, {}

    # --- Identify Crisis Cases by Income Group ---
    for g in groups:
        crisis = df.loc[
            (df["IncomeGroup"] == g) & (df[value_col].abs() > cap)
        ].copy()
        if crisis.empty:
            continue

        counts[g] = len(crisis)
        result[g] = (
            crisis.assign(
                info=lambda x: (
                    x["Country Name"]
                    + " (" + x["year"].astype(str)
                    + ", " + x[value_col].round(1).astype(str)
                    + ")"
                )
            )
            .sort_values(value_col, key=lambda x: x.abs(), ascending=False)["info"]
            .reset_index(drop=True)
        )

    # --- Align Lengths Across Columns ---
    max_len = max((len(v) for v in result.values()), default=0)
    table = pd.DataFrame({k: v.reindex(range(max_len)) for k, v in result.items()})
    table.index = range(1, len(table) + 1)

    # --- Plot Crisis Counts by Income Group ---
    counts_series = pd.Series(counts).sort_values(ascending=False)
    plt.figure(figsize=(8, 4))
    sns.barplot(x=counts_series.values, y=counts_series.index, color="darkred")
    plt.title(f"Crisis Migration Observations (>|±{cap}| per 1,000) by Income Group")
    plt.xlabel("Count of Crisis Years")
    plt.ylabel("Income Group")
    plt.tight_layout()
    plt.show()

    return table


# --- Generate and Display Crisis Table ---
print("[INFO] Generating crisis-level migration summary by income group...")
crisis_table_full = crisis_table_by_income(df_country, cap=50)
display(crisis_table_full)

[INFO] Generating crisis-level migration summary by income group...

# === Global Trend Over Time ===

# --- Compute Global Average (Capped) Net Migration per Year ---
global_trend = (
    df_country.groupby("year")["net_migration_per_1000_capped"]
    .mean()
    .reset_index()
)

print(f"[INFO] Computed global average migration rates for {len(global_trend)} years.")

# --- Plot Global Trend ---
plt.figure(figsize=(12, 5))
sns.lineplot(
    data=global_trend,
    x="year",
    y="net_migration_per_1000_capped",
    marker="o",
    color="black",
)

plt.title("Average Global Net Migration per 1,000 People (1990–2023, Capped ±50)", fontsize=13)
plt.xlabel("Year")
plt.ylabel("Net Migration (per 1,000, capped)")
plt.axhline(0, color="gray", linestyle="--", lw=1)

# --- Format X-Axis ---
years = sorted(df_country["year"].unique())
plt.xticks(ticks=[y for y in years if y % 2 == 0])

plt.tight_layout()
plt.show()

[INFO] Computed global average migration rates for 34 years.

# === Country-Level Migration Volatility ===

# --- Compute Standard Deviation of Net Migration per Country ---
migration_variability = (
    df_country.groupby("Country Name")["net_migration_per_1000"]
    .std()
    .sort_values(ascending=False)
)

print(f"[INFO] Computed migration volatility for {len(migration_variability)} countries.")

# --- Plot Top 15 Most Volatile Countries ---
plt.figure(figsize=(12, 5))
migration_variability.head(15).plot(kind="bar", color="orange")

plt.title("Top 15 Countries with Most Volatile Net Migration (per 1,000)", fontsize=13)
plt.ylabel("Standard Deviation")
plt.xlabel("Country")
plt.xticks(rotation=45, ha="right")

plt.tight_layout()
plt.show()

[INFO] Computed migration volatility for 168 countries.

# === Top Migration Inflows and Outflows (Average 1990–2023) ===

# --- Compute Average Net Migration (Capped) per Country ---
avg_migration = (
    df_country.groupby("Country Name")["net_migration_per_1000_capped"]
    .mean()
    .sort_values()
)

top_inflows = avg_migration.tail(10)
top_outflows = avg_migration.head(10)

# --- Combine and Display Summary Table ---
top_summary = pd.concat([
    top_outflows.rename("Average per 1,000").to_frame().assign(Type="Outflow"),
    top_inflows.rename("Average per 1,000").to_frame().assign(Type="Inflow"),
])

print("[INFO] Displaying top 10 inflow and outflow countries (1990–2023).")
display(top_summary.round(2))

# --- Plot Top Inflows and Outflows ---
plt.figure(figsize=(12, 7))
plt.barh(top_inflows.index, top_inflows, color="skyblue", label="Largest Inflows")
plt.barh(top_outflows.index, top_outflows, color="salmon", label="Largest Outflows")
plt.axvline(0, color="black", linewidth=1)

plt.title(
    "Top 10 Migration Inflows and Outflows\n"
    "(Average Net Migration per 1,000 People, 1990–2023, Capped ±50)",
    fontsize=13,
)
plt.xlabel("Net Migration (per 1,000, capped)")
plt.ylabel("Country")
plt.legend(loc="lower right", frameon=True)
plt.grid(axis="x", linestyle="--", alpha=0.6)

plt.tight_layout()
plt.show()

[INFO] Displaying top 10 inflow and outflow countries (1990–2023).

# === Global vs Regional and Income-Group Migration Trends ===

# --- Ensure Capped Migration Variable Exists ---
if "net_migration_per_1000_capped" not in df_country.columns:
    df_country["net_migration_per_1000_capped"] = df_country["net_migration_per_1000"].clip(-50, 50)
    print("[INFO] Created capped variable 'net_migration_per_1000_capped' (±50).")

# --- Compute Global Average ---
global_trend = (
    df_country.groupby("year")["net_migration_per_1000_capped"]
    .mean()
    .reset_index()
)

# --- Compute Regional Averages ---
region_trends = (
    df_country.groupby(["Region", "year"], observed=True)["net_migration_per_1000_capped"]
    .mean()
    .reset_index()
)

# --- Compute Income-Group Averages ---
income_trends = (
    df_country.groupby(["IncomeGroup", "year"], observed=True)["net_migration_per_1000_capped"]
    .mean()
    .reset_index()
)

print("[INFO] Computed global, regional, and income-group migration trends.")

# --- Plot Global, Regional, and Income-Group Trends ---
plt.figure(figsize=(14, 6))

# Global Average (bold black line)
sns.lineplot(
    data=global_trend,
    x="year",
    y="net_migration_per_1000_capped",
    color="black",
    linewidth=2.2,
    label="Global Average",
)

# Income Groups (dashed styles)
for group, style in zip(
    ["High income", "Upper middle income", "Lower middle income", "Low income"],
    ["--", "-.", ":", (0, (3, 1, 1, 1))],
):
    sub = income_trends[income_trends["IncomeGroup"] == group]
    sns.lineplot(
        data=sub,
        x="year",
        y="net_migration_per_1000_capped",
        linestyle=style,
        linewidth=1.8,
        label=group,
    )

# Key Regions (solid lines)
for region in ["Europe & Central Asia", "Sub-Saharan Africa", "Middle East & North Africa"]:
    sub = region_trends[region_trends["Region"] == region]
    sns.lineplot(
        data=sub,
        x="year",
        y="net_migration_per_1000_capped",
        linewidth=2.0,
        label=region,
    )

plt.axhline(0, color="gray", linestyle="--", lw=1)
plt.title("Global, Regional, and Income-Group Migration Trends (1990–2023)", fontsize=13)
plt.xlabel("Year")
plt.ylabel("Net Migration (per 1,000, capped at ±50)")
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left", frameon=True)
plt.grid(axis="x", linestyle="--", alpha=0.6)
plt.xticks(range(1990, 2024, 2))

plt.tight_layout()
plt.show()

[INFO] Computed global, regional, and income-group migration trends.

# === Correlation and Feature Relationships ===

# --- Select Available Numeric Columns ---
numeric_cols = [
    "pop_density", "mobile_subs", "exports_gdp", "imports_gdp",
    "gdp_growth", "gdp_per_capita", "under5_mortality", "unemployment",
    "adol_fertility", "life_expectancy", "fertility_rate", "pop_growth",
    "population", "urban_pop_growth", "hdi", "net_migration_per_1000_capped"
]

corr_cols = [c for c in numeric_cols if c in df_country.columns]
print(f"[INFO] Using {len(corr_cols)} numeric variables for correlation analysis.")

# --- Compute Correlation Matrix ---
corr = df_country[corr_cols].corr()

# --- Plot Correlation Heatmap ---
plt.figure(figsize=(14, 10))
sns.heatmap(
    corr,
    cmap="coolwarm",
    center=0,
    annot=True,
    fmt=".2f",
    cbar_kws={"label": "Pearson Correlation"},
    linewidths=0.5,
)

plt.title("Correlation Heatmap — Country-Level Indicators", fontsize=13)
plt.tight_layout()

# --- Save Heatmap as Image for Notebook 03 ---
output_path = Path("../docs")
output_path.mkdir(parents=True, exist_ok=True)
save_path = output_path / "correlation_heatmap_country_level.png"
plt.savefig(save_path, dpi=300, bbox_inches="tight")
print(f"[INFO] Heatmap saved to: {save_path}")

plt.show()

# --- Identify Top Correlated Indicators with Migration ---
corr_table = (
    corr["net_migration_per_1000_capped"]
    .drop("net_migration_per_1000_capped")
    .sort_values(ascending=False)
    .to_frame("Correlation with Net Migration (per 1,000, capped)")
    .round(3)
)

print("[INFO] Top correlated indicators with net migration (capped):")
display(corr_table)

[INFO] Using 16 numeric variables for correlation analysis.
[INFO] Heatmap saved to: ../docs/correlation_heatmap_country_level.png

[INFO] Top correlated indicators with net migration (capped):

# === Country-Level Migration Trends ===

# --- Define Representative Countries ---
countries_show = [
    "Ukraine", "Syrian Arab Republic", "Sudan", "Afghanistan",
    "Turkiye", "Cyprus", "United Arab Emirates", "Qatar", "Germany"
]

print(f"[INFO] Plotting migration trends for {len(countries_show)} representative countries...")

# --- Create Multi-Panel Figure ---
fig, axes = plt.subplots(3, 3, figsize=(16, 10), sharey=True)
axes = axes.flatten()

for i, country in enumerate(countries_show):
    sub = df_country[df_country["Country Name"] == country]
    if not sub.empty:
        sns.lineplot(
            ax=axes[i],
            data=sub,
            x="year",
            y="net_migration_per_1000_capped",
            marker="o",
            color="steelblue",
        )
        axes[i].axhline(0, color="black", linestyle="--", lw=1)
        axes[i].set_title(country, fontsize=11)
        axes[i].set_xlabel("Year")
        axes[i].set_ylabel("Net Migration (per 1,000, capped)")
        axes[i].grid(alpha=0.3, linestyle="--")
    else:
        axes[i].set_visible(False)

plt.suptitle("Migration Patterns in Representative Countries (1990–2023)", fontsize=15, y=0.98)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

[INFO] Plotting migration trends for 9 representative countries...

Author	Golib Sanaev
Project	Forecasting Migration Flows with Machine Learning
Created	2025-09-23
Last Updated	2025-10-14

	Original
count	5712.000000
mean	0.008882
std	17.733863
min	-709.898215
1%	-29.145429
5%	-14.879145
50%	-0.294753
95%	14.405584
99%	44.691864
max	340.852702

	Capped ±50
count	5712.000000
mean	0.012129
std	10.346746
min	-50.000000
1%	-29.145429
5%	-14.879145
50%	-0.294753
95%	14.405584
99%	44.691864
max	50.000000

	Low income	Upper middle income	Lower middle income	High income
1	Rwanda (1994, -305.9)	Ukraine (2022, -138.8)	Timor-Leste (1999, -315.0)	Kuwait (1990, -709.9)
2	Rwanda (1996, 203.9)	Bosnia and Herzegovina (1992, -103.2)	Lebanon (2013, 127.7)	Kuwait (1991, 340.9)
3	Eritrea (1991, -171.2)	Libya (2011, -88.7)	Bhutan (1992, -114.1)	Qatar (2006, 209.9)
4	Burundi (1993, -120.5)	Armenia (1992, -54.9)	Djibouti (1992, -104.2)	Qatar (2007, 203.3)
5	Syrian Arab Republic (2013, -96.1)	NaN	Djibouti (1990, 86.2)	Oman (2011, 160.1)
6	Burundi (1994, 90.4)	NaN	Jordan (2006, 78.2)	United Arab Emirates (2007, 128.4)
7	Afghanistan (1992, 90.2)	NaN	Jordan (2013, 76.3)	Qatar (2008, 116.0)
8	Somalia, Fed. Rep. (1991, -83.4)	NaN	Jordan (1990, 73.5)	Oman (2012, 91.0)
9	Rwanda (1995, 77.5)	NaN	Jordan (2014, 72.3)	Qatar (2014, 90.5)
10	Afghanistan (1993, 76.9)	NaN	Timor-Leste (2001, 60.5)	Qatar (2005, 83.6)
11	Syrian Arab Republic (2014, -71.9)	NaN	Timor-Leste (2000, 54.0)	United Arab Emirates (2008, 83.0)
12	Togo (1993, -69.2)	NaN	Cambodia (1994, 52.9)	Kuwait (2022, 80.3)
13	Eritrea (1995, 67.2)	NaN	Jordan (2015, 51.4)	United Arab Emirates (2006, 79.7)
14	South Sudan (2017, -65.4)	NaN	Cambodia (1995, 50.2)	Qatar (2009, 76.6)
15	Mozambique (1994, 62.4)	NaN	NaN	Qatar (2015, 75.7)
16	South Sudan (2016, -60.4)	NaN	NaN	Oman (2022, 71.7)
17	Sierra Leone (1991, -56.3)	NaN	NaN	Qatar (2013, 69.7)
18	Rwanda (1997, 52.9)	NaN	NaN	United Arab Emirates (1996, 69.4)
19	Afghanistan (2000, -51.0)	NaN	NaN	United Arab Emirates (1997, 65.1)
20	NaN	NaN	NaN	Bahrain (2002, 64.7)
21	NaN	NaN	NaN	United Arab Emirates (1998, 60.3)
22	NaN	NaN	NaN	Bahrain (2003, 59.6)
23	NaN	NaN	NaN	Kuwait (2011, 56.4)
24	NaN	NaN	NaN	Qatar (2010, 56.0)
25	NaN	NaN	NaN	United Arab Emirates (1999, 55.3)
26	NaN	NaN	NaN	Bahrain (2004, 55.1)
27	NaN	NaN	NaN	Bahrain (2001, 52.0)
28	NaN	NaN	NaN	Bahrain (2005, 51.4)
29	NaN	NaN	NaN	United Arab Emirates (2000, 51.1)

	Average per 1,000	Type
Country Name
Tonga	-20.43	Outflow
Samoa	-17.41	Outflow
Moldova	-16.12	Outflow
Albania	-14.14	Outflow
Georgia	-13.71	Outflow
Guyana	-13.42	Outflow
El Salvador	-10.87	Outflow
Fiji	-10.09	Outflow
Armenia	-9.85	Outflow
Eswatini	-7.99	Outflow
Maldives	8.46	Inflow
Cyprus	9.73	Inflow
Singapore	13.16	Inflow
Luxembourg	13.40	Inflow
Saudi Arabia	13.60	Inflow
Equatorial Guinea	14.30	Inflow
Kuwait	16.02	Inflow
Bahrain	17.56	Inflow
Qatar	29.20	Inflow
United Arab Emirates	33.99	Inflow

📊 Forecasting Migration Flows — Notebook 02: Exploratory Data Analysis (EDA)¶

🎯 Purpose¶

📑 Table of Contents¶

⚙️ 1. Setup and Load Clean Data¶

🔍 2. Quick Sanity Checks¶

📈 3. Indicator Distributions¶

🌍 4. Focus on Net Migration (per 1,000 People)¶

📊 4.1 Distribution: Original vs Capped¶

⚠️ 4.2 Crisis Flag (for Modeling and EDA)¶

🚨 4.3 Outlier Countries by Income Group¶

🌐 5. Global Trend Over Time¶

📉 6. Country-Level Variation¶

🚀 7. Top Migration Inflows and Outflows (Average 1990–2023)¶

🌎 8. Global vs Regional Migration Trends¶

🔗 9. Correlation and Feature Relationships¶

🧭 Feature Correlation Insights and Next Steps¶

🗺️ 10. Country-Level Trends¶

🧩 11. Key Insights Summary¶

	Correlation with Net Migration (per 1,000, capped)
pop_growth	0.525
urban_pop_growth	0.417
gdp_per_capita	0.378
exports_gdp	0.268
life_expectancy	0.202
hdi	0.194
mobile_subs	0.171
gdp_growth	0.161
pop_density	0.117
imports_gdp	0.072
population	0.001
fertility_rate	-0.078
under5_mortality	-0.109
unemployment	-0.125
adol_fertility	-0.143