Using a GUI to learn

A GUI for pandas | bamboolib
github.com/adamerose/PandasGUI
Data Integration Code to go through

python
import psycopg as pg
# https://stackoverflow.com/questions/62688256/sqlalchemy-exc-nosuchmoduleerror-cant-load-plugin-sqlalchemy-dialectspostgre
import psycopg2 as pg2  # sqlalchemy doesnt have support for psycopg3 yet
import pandas as pd
import numpy as np
import os
from sqlalchemy import create_engine
from urllib.parse import urlparse
from tqdm import tqdm
from jaccard_index.jaccard import jaccard_index

'''
CONFIGURATION SECTION
__________________________________________
'''
pk_ot = 'id_ot'  #
pk_qb = 'id_qb'  #

'''
------------------------------------------
'''

# ACCESSING ENV VARIABLES - https://dev.to/vulcanwm/environment-variables-in-heroku-python-385o#:~:text=To%20access
# %20your%20environment%20variables,to%20install%20the%20os%20module.&text=Then%2C%20the%20function%20to%20access,
# %5D%20with%20the%20key%20name).&text=You've%20done%20it!
# PANDAS to POSTGRES - https://towardsdatascience.com/upload-your-pandas-dataframe-to-your-database-10x-faster-eb6dc6609ddf
#       https://www.geeksforgeeks.org/how-to-insert-a-pandas-dataframe-to-an-existing-postgresql-table/
#
# PSYCOPG and PANDAS for database stuff -
# https://www.linkedin.com/pulse/how-create-pandas-data-frame-postgresql-psycopg-vitor-spadotto/

# Getting Environment Variables in Heroku
# DATABASE_URL = os.getenv(DATABASE_URL)
# OT_TABLE_NAME = os.getenv(OT_TABLE_NAME)
# QB_TABLE_NAME = os.getenv(QB_TABLE_NAME)

# Connect to POSTGRES database TODO CONFIG
_DATABASE_URL = "POSTGRES URL HERE"
DATABASE_URL = _DATABASE_URL.replace("postgres", "postgresql")  ## sqlalchemy only works if url has 'postgresql'

# TABLE NAMES  TODO CONFIG
OT_TABLE_NAME = "onetrust_vendors"
QB_TABLE_NAME = "quickbase_vendors"
OT_TO_QB_TABLE_NAME = "ot_to_qb_vendors"
VENDOR_MAPPING_TABLE = "mapped_vendor_data"

## Parse out the postgres URL to get each variable
# DB_URL = urlparse(DATABASE_URL)
# username = DB_URL.username
# password = DB_URL.password
# database = DB_URL.path[1:]
# hostname = DB_URL.hostname
# port = DB_URL.port

# conn_dict = pg.conninfo.conninfo_to_dict(DATABASE_URL)
# with pg.connect(**conn_dict) as conn:

alchemy_engine = create_engine(DATABASE_URL)
alchemy_conn = alchemy_engine.connect()

conn = pg.connect(DATABASE_URL)
conn.autocommit = True
cur = conn.cursor()

# Collect Data from Tables and store in Pandas dataframes
try:
    OT_query = '''SELECT * FROM ''' + OT_TABLE_NAME
    cur.execute(OT_query)
    ot_data = cur.fetchall()
    ot_cols = []
    for elt in cur.description:
        ot_cols.append(elt[0])
    ot_df = pd.DataFrame(data=ot_data, columns=ot_cols)
except:
    empty_df = pd.DataFrame()
    empty_df.to_sql(OT_TABLE_NAME, con=alchemy_conn, if_exists='replace', index=False)

try:
    QB_query = '''SELECT * FROM ''' + QB_TABLE_NAME
    cur.execute(QB_query)
    qb_data = cur.fetchall()
    qb_cols = []
    for elt in cur.description:
        qb_cols.append(elt[0])
    qb_df = pd.DataFrame(data=qb_data, columns=qb_cols)
except:
    empty_df = pd.DataFrame()
    empty_df.to_sql(QB_TABLE_NAME, con=alchemy_conn, if_exists='replace', index=False)

try:
    VENDOR_MAPPING_query = '''SELECT * FROM ''' + VENDOR_MAPPING_TABLE
    cur.execute(VENDOR_MAPPING_query)
    old_vendor_mapping_data = cur.fetchall()
    old_vendor_mapping_cols = []
    for elt in cur.description:
        old_vendor_mapping_cols.append(elt[0])
    old_vendor_mapping_df = pd.DataFrame(data=old_vendor_mapping_data, columns=old_vendor_mapping_cols)
except:
    old_vendor_mapping_df = pd.DataFrame()
    old_vendor_mapping_df.to_sql(VENDOR_MAPPING_TABLE, con=alchemy_conn, if_exists='replace', index=False)

OT_TO_QB_query = '''SELECT * FROM ''' + OT_TO_QB_TABLE_NAME
cur.execute(OT_TO_QB_query)
ot_to_qb_data = cur.fetchall()
ot_to_qb_cols = []
for elt in cur.description:
    ot_to_qb_cols.append(elt[0])
ot_to_qb_df = pd.DataFrame(data=ot_to_qb_data, columns=ot_to_qb_cols)
orig_ot_to_qb_df = ot_to_qb_df

print("[+] Loaded PostGreSQL tables...")

# Data Deduplication
# Drop columns where there are problematic nulls from backend changes in Jitsu or in OT or QB
# Using drop() to delete rows based on column value
try:
    ot_df.drop(ot_df[ot_df['status'] == ''].index, inplace=True)
    print("[+] Dropped empty 'status' rows from OT vendors table")
    ot_df.drop(ot_df[ot_df['name'] == ''].index, inplace=True)
    print("[+] Dropped empty 'name' rows from OT vendors table")
    qb_df.drop(qb_df[qb_df['status'] == ''].index, inplace=True)
    print("[+] Dropped empty 'status' rows from QB vendors table")
    qb_df.drop(qb_df[qb_df['name'] == ''].index, inplace=True)
    print("[+] Dropped empty 'name' rows from QB vendors table")
except:
    pass

# Rename input tables from ot and qb
# This was only used because the name column was different at one point
# try:
#     # ot_df.rename(columns={'status': 'ot_status', 'name': 'name_ot'}, inplace=True)
#     ot_df['name_ot'] = np.where((ot_df.name_ot == '') & (ot_df.name != ''), ot_df['name'], ot_df['name_ot'])
# except:
#     pass

# Make sure name rows from OT and QB are changed to name_ot and name_qb and also change status column names
# TODO CONFIG
try:
    ot_df.rename(columns={'name': 'name_ot', 'status': 'status_ot'}, inplace=True)
    print("[+] Changed OT 'name' column to 'name_ot'")
    qb_df.rename(columns={'name': 'name_qb', 'status': 'status_qb'}, inplace=True)
    print("[+] Changed QB 'name' column to 'name_qb'")
except:
    pass

print("[+] Deduplicating tables...")
ot_df.drop_duplicates(subset=pk_ot, keep='first', inplace=True)
qb_df.drop_duplicates(subset=pk_qb, keep='first', inplace=True)
print("[+] Deduplication Finished...")

# Clean OT Data
try:
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    ot_df.drop(ot_df[ot_df['name_ot'] == ''].index, inplace=True)
    ot_df = ot_df.dropna(axis=0, how='any', subset=['name_ot', 'id_ot'])
    ot_df.sort_values(by=['id_ot'])
    print("[+] Cleaned OT Data")
except:
    pass

# Clean QB Data
try:
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    ot_df.drop(ot_df[ot_df['name_qb'] == ''].index, inplace=True)
    ot_df = ot_df.dropna(axis=0, how='any', subset=['name_qb', 'id_qb'])
    ot_df.sort_values(by=['id_qb'])
    print("[+] Cleaned QB Data")
except:
    pass

# Data Integration and Schema Matching
# ------------------------------------------
# URL Links
# https://pbpython.com/record-linking.html
# https://www.educative.io/answers/three-ways-to-combine-dataframes-in-pandas
# https://github.com/J535D165/recordlinkage
# https://github.com/J535D165/data-matching-software
# https://github.com/AI-team-UoA/pyJedAI
# https://github.com/delftdata/valentine

# Schema Matching (hold off on this - manually for now)
# For now, a custom mapping between tables is fine
# QB (left) and OT (right)
# vendor_schema_map = {
#
# }
# ------------------------------------------

# Cross Join Dataframes to get every combination
print("[+] Cross-Joining OT and QB based on mapped table ('ot_to_qb')")
ot_df['JOIN'] = True
qb_df['JOIN'] = True
merged_df = pd.merge(ot_df, qb_df, on='JOIN')
merged_df.drop('JOIN', axis=1, inplace=True)
ot_df.drop('JOIN', axis=1, inplace=True)
qb_df.drop('JOIN', axis=1, inplace=True)

# renamed merged columns
merged_df.rename(columns={'id_x': pk_ot, 'id_y': pk_qb}, inplace=True)

# print(":::::::MERGED DF::::::")
# merged_df.head(5)
# merged_df.info()

# Using Jaccard for Similarity-based joining with Pandas
similarity_threshold = 0.6  # CONFIGURE (threshold for similarity - rows below this are deleted)
left_column = 'name_ot'  # CONFIGURE (field to compute similarities for)
right_column = 'name_qb'  # CONFIGURE (field to compute similarities for)
sim_column_name = "similarity"  # name for the similarity column
# init progress indicator
tqdm_desc = "Fuzzy Joining on - " + left_column + " <-- compared to --> " + right_column
tqdm.pandas(desc=tqdm_desc)
print("MERGED DF")
merged_df.info()
merged_df[sim_column_name] = merged_df.progress_apply(lambda x: jaccard_index(x[left_column], x[right_column]) if
jaccard_index(x[left_column], x[right_column]) > similarity_threshold else 0, axis=1)

# Filter dataframes based on threshold
df_similar_matches = merged_df[merged_df[sim_column_name] >= similarity_threshold]
df_exact_matches = merged_df[merged_df[sim_column_name] == 1.0]

# Sort Fuzzy-Joined dataframe
df_similar_matches.sort_values(by=sim_column_name, ascending=False)

print("[+] Similarity between databases calculated...")
# print("::::::: Fuzzy-Joined Dataframe ::::::")
# print(df_similar_matches.head(5))
# df_similar_matches.info()

# Add similar matches dataframe to mapping dataframe that matches left -> right tables
print("[+] Configuring mapping tables")
left_primary_key = pk_ot  # this is equivalent to the primary key (left is OT)
right_primary_key = pk_qb  # this is equivalent to the primary key (right is QB)
ot_to_qb_columns = [left_primary_key, left_column, right_primary_key, right_column, sim_column_name, "match"]
ot_to_qb_df = ot_to_qb_df[ot_to_qb_columns]  # selects the same columns as from the similar matches
# df_similar_matches['match'] = "?"  # add match column
df_similar_matches_temp = df_similar_matches.assign(match='y')
ot_to_qb_similar_matches_df = df_similar_matches_temp[
    ot_to_qb_columns]  # gets columns setup for appending current matches

# Create tables for joining to update mapping
# ot_to_qb_nonmatches = ot_to_qb_df[ot_to_qb_df["match"] == "n"]
# ot_to_qb_matches = ot_to_qb_df[ot_to_qb_df["match"] == "y"]
# ot_to_qb_unknown = ot_to_qb_df[ot_to_qb_df["match"] == "?"]
# ot_to_qb_errors = ot_to_qb_df[ot_to_qb_df["match"] != ("n", "y", "?")]

# Rename columns
ot_df.rename(columns={'id': pk_ot}, inplace=True)
qb_df.rename(columns={'id': pk_qb}, inplace=True)
orig_ot_to_qb_df.rename(columns={'id_x': pk_ot, 'id_y': pk_qb, 'name_x': 'name_ot', 'name_y': 'name_qb'}, inplace=True)

# Check for conflicts
print("[+] Checking for conflicts...")
ot_to_qb_conflicts_temp = orig_ot_to_qb_df.merge(how='left',
                                                 right=ot_to_qb_similar_matches_df,
                                                 left_on=left_primary_key,
                                                 right_on=left_primary_key)
ot_to_qb_conflicts_temp = ot_to_qb_conflicts_temp[ot_to_qb_conflicts_temp['id_qb_x']
                                                  == ot_to_qb_conflicts_temp['id_qb_y']]
ot_to_qb_conflicts_temp.rename(columns={'similarity_x': 'similarity_from_db', 'similarity_y': 'similarity_from_python'},
                               inplace=True)
# ot_to_qb_conflicts_temp['match_y'] = (ot_to_qb_conflicts_temp['match_y']).where(
#     ot_to_qb_conflicts_temp['match_y'] == '',
#     'n')
ot_to_qb_conflicts = ot_to_qb_conflicts_temp[
    ot_to_qb_conflicts_temp['match_x'] != ot_to_qb_conflicts_temp['match_y']]  # CONFIGURE

# Find instances of new vendors - means mapping file may need updated.
ot_to_qb_similar_matches_df = ot_to_qb_similar_matches_df.sort_values('similarity', ascending=False) \
    .drop_duplicates('id_ot').sort_index()
new_to_ot_df = orig_ot_to_qb_df.merge(how='outer',
                                      right=ot_to_qb_similar_matches_df,
                                      left_on=left_primary_key,
                                      right_on=left_primary_key,
                                      indicator=True)
new_to_ot_df = new_to_ot_df.loc[new_to_ot_df['_merge'] == 'right_only']
new_to_ot_df = new_to_ot_df.sort_values('similarity_y', ascending=False) \
    .drop_duplicates('id_ot').sort_index()

# UPDATE TODO add to vendor alerts

# Create Vendor-to-Vendor Mapping between OT and QB
# Left Join OT with known QB matches
print("[+] Left joining OT with known QB matches...")
# cols_to_use = ot_df.columns.difference(orig_ot_to_qb_df.columns)  # get difference of cols between dfs so we
# # don't duplicate cols
# # print(cols_to_use.tolist())
# temp_df = orig_ot_to_qb_df[cols_to_use]
# temp_df.info()

ot_mapped_df = ot_df.merge(how='left',
                           right=orig_ot_to_qb_df,
                           left_on=pk_ot,
                           right_on=pk_ot,
                           # suffixes=('', '_map2ot'),
                           suffixes=('', '_drop'),
                           copy=False)
ot_mapped_df = ot_mapped_df[ot_mapped_df['match'] == 'y']  # Filter to only matches
ot_mapped_df.drop([col for col in ot_mapped_df.columns if 'drop' in col], axis=1, inplace=True)

# Left Join QB with known OT matches
print("[+] Left joining OT matches with respective QB matches...")
# cols_to_use = qb_df.columns.difference(ot_mapped_df.columns)  # get difference of cols between dfs so we don't
# # duplicate cols
# temp_df = orig_ot_to_qb_df[cols_to_use]

result_mapped_df = ot_mapped_df.merge(how='left',
                                      right=qb_df,
                                      left_on=pk_qb,
                                      right_on=pk_qb,
                                      # suffixes=('', '_map2qb'),
                                      suffixes=('', '_drop'),
                                      copy=False)
result_mapped_df = result_mapped_df.dropna(axis=1)  # get rid of completely null columns
result_mapped_df.drop([col for col in result_mapped_df.columns if 'drop' in col], axis=1, inplace=True)

# Get missing tables by doing a left anti join on
print("[+] Finding missing vendors between two databases..")
ot_vendors_missing_from_mapping = result_mapped_df.merge(how='outer',
                                                         right=qb_df,
                                                         left_on=pk_qb,
                                                         right_on=pk_qb,
                                                         indicator=True)
ot_vendors_missing_from_mapping = ot_vendors_missing_from_mapping.loc[ot_vendors_missing_from_mapping['_merge'] ==
                                                                      'right_only']
ot_vendors_missing_from_mapping = ot_vendors_missing_from_mapping.dropna(axis=1, how='all')  # get rid of all the null
# columns

qb_vendors_missing_from_mapping = ot_df.merge(how='outer',
                                              right=result_mapped_df,
                                              left_on=pk_ot,
                                              right_on=pk_ot,
                                              indicator=True)
qb_vendors_missing_from_mapping = qb_vendors_missing_from_mapping.loc[qb_vendors_missing_from_mapping['_merge'] ==
                                                                      'left_only']
qb_vendors_missing_from_mapping = qb_vendors_missing_from_mapping.dropna(axis=1, how='all')  # get rid of all the null
# columns


# Look for any vendors that have relevance or have changed in QB compared to OT data
# COMPARING - result_mapped_df && old_vendor_mapping_df (from Postgres database)
tqdm_desc = "Processing duplicate Vendors"
tqdm.pandas(desc=tqdm_desc)


def vendor_mapping_changed(old_mapping_df, new_mapping_df, columns):
    temp_old_mapping_df = old_mapping_df[columns]
    temp_new_mapping_df = new_mapping_df[columns]
    return not temp_old_mapping_df.equals(temp_new_mapping_df)


def create_vendor_alert(old_id_qb, old_qb_org_value, old_qb_status, new_id_qb, new_qb_org_value, new_qb_status):
    alerts = []
    no_alerts = True
    if old_qb_status != new_qb_status:
        alerts.append("[CHECK] QuickBase Status Changed.");
        no_alerts = False
    if old_qb_org_value != new_qb_org_value:
        alerts.append("[CHECK] QuickBase orgs using vendor changed.");
        no_alerts = False
    if old_id_qb != new_id_qb:
        alerts.append("[CHECK] Vendors changed in QuickBase. Check conflicts table just in case.");
        no_alerts = False
    if no_alerts:
        return ""
    else:
        sep = ", "
        return (sep.join(alerts) + sep).rstrip()


# Get duplicate entities from each table and compare them to form more vendor alerts
def process_duplicate_vendor(mapping_df, match_column, match_boolean_column, current_match_val, current_status_val,
                             current_id_val, primary_name_column, status_column_name):
    dup_exists = not mapping_df[mapping_df[match_column] == current_id_val].empty
    inactive_statuses = ['Dormant', 'Terminated', 'Closed to Future Business', '']
    if dup_exists:
        temp_dup_df = mapping_df[mapping_df[match_column] == current_id_val]
        dup_vendor_ct = 0
        old_active_vendor_ct = 0
        old_inactive_vendor_ct = 0
        dup_vendors = []
        old_active_vendors = []
        old_inactive_vendors = []
        for ind in temp_dup_df.index:
            temp_match_val = temp_dup_df[match_boolean_column][ind]
            temp_status_val = temp_dup_df[status_column_name][ind]
            # print(":::: ", temp_dup_df[match_column][ind])
            # print(current_match_val, " ::: ", temp_match_val, " ::: ", current_status_val, " ::: ", temp_status_val)
            if current_match_val == 'y' and temp_match_val == 'y':
                dup_vendor_ct += 1
                temp_str = str(temp_dup_df[primary_name_column][ind])
                dup_vendors += [temp_str]
            elif current_match_val == 'y' and temp_match_val == 'n' and temp_status_val in inactive_statuses:
                old_inactive_vendor_ct += 1
                temp_str = str(temp_dup_df[primary_name_column][ind])
                old_active_vendors += [temp_str]
            elif current_match_val == 'y' and temp_match_val == 'n' and temp_status_val not in inactive_statuses:
                old_active_vendor_ct += 1
                temp_str = str(temp_dup_df[primary_name_column][ind])
                old_inactive_vendors += [temp_str]
        alerts = ''
        if dup_vendor_ct > 0:
            # dup_vendors_str = ''.join(dup_vendors)
            # dup_vendors_str = ' << AND >> '.join(map(str, dup_vendors))
            alerts += '[' + str(dup_vendor_ct) + '] Duplicate vendors IN MAPPING FILE (Sort by IDs to find duplicates) '
        if old_inactive_vendor_ct > 0:
            # old_active_vendors_str = ''.join(old_active_vendors)
            # old_active_vendors_str = ' < AND >'.join(map(str, old_active_vendors))
            alerts += '[' + str(old_inactive_vendor_ct) + '] old duplicate vendors with NON-Active QB status IN ' \
                                                          'MAPPING FILE  (Sort by IDs to find duplicates)'
        if old_active_vendor_ct > 0:
            # old_inactive_vendors_str = ''.join(old_inactive_vendors)
            # old_inactive_vendors_str = ' < AND >'.join(map(str, old_inactive_vendors))
            alerts += '[' + str(old_active_vendor_ct) + '] old duplicate vendors with Active QB status IN MAPPING ' \
                                                        'FILE  (Sort by IDs to find duplicates)'
        return alerts


def process_duplicate_vendors(mapping_df, match_boolean_column, status_column, match_column, primary_name_column):
    # for each row of mapping_df create potential alerts
    temp_df = mapping_df.copy()
    temp_df['alert'] = temp_df.progress_apply(lambda x: process_duplicate_vendor(temp_df,
                                                                                 match_column,
                                                                                 match_boolean_column,
                                                                                 x[match_boolean_column],
                                                                                 x[status_column],
                                                                                 x[match_column],
                                                                                 primary_name_column,
                                                                                 status_column),
                                              axis=1)
    return temp_df


# Create Vendor Alert Dataframe
vendor_alerts_df = result_mapped_df.copy()
vendor_alerts_df = vendor_alerts_df.assign(alert='')

# Shows duplicates in each database where multiple vendors in one db map to a vendor in the other db
# RESOURCE - https://sparkbyexamples.com/pandas/pandas-get-list-of-all-duplicate-rows/

print("[+] Finding duplicate vendors from each table")
# orig_ot_to_qb_df.sort_values(pk_ot, inplace=True)
# duplicates_in_ot_matches_df = orig_ot_to_qb_df[orig_ot_to_qb_df[pk_ot].duplicated()]
# orig_ot_to_qb_df.sort_values(pk_qb, inplace=True)
# duplicates_in_qb_matches_df = orig_ot_to_qb_df[orig_ot_to_qb_df[pk_qb].duplicated()]

# orig_ot_to_qb_df = orig_ot_to_qb_df[orig_ot_to_qb_df['match'] == 'y']
print("[+] Processing duplicate OT vendors...")
duplicates_in_ot_matches_df = orig_ot_to_qb_df[orig_ot_to_qb_df.duplicated(pk_ot, keep=False)]
duplicates_in_ot_matches_df = process_duplicate_vendors(duplicates_in_ot_matches_df, 'match', 'status', pk_ot,
                                                        'name_ot')
duplicates_in_ot_matches_df = duplicates_in_ot_matches_df[duplicates_in_ot_matches_df['alert'] != '']
duplicates_in_ot_matches_df.sort_values(by=['name_ot'])

print("[+] Processing duplicate QB vendors...")
# duplicates_in_ot_matches_df = duplicates_in_ot_matches_df[duplicates_in_ot_matches_df['match'] == 'y']
duplicates_in_qb_matches_df = orig_ot_to_qb_df[orig_ot_to_qb_df.duplicated(pk_qb, keep=False)]
duplicates_in_qb_matches_df.info()
duplicates_in_qb_matches_df = process_duplicate_vendors(duplicates_in_qb_matches_df, 'match', 'status', pk_qb,
                                                        'name_qb')
duplicates_in_qb_matches_df = duplicates_in_qb_matches_df[duplicates_in_qb_matches_df['alert'] != '']
duplicates_in_qb_matches_df.sort_values(by=['name_qb'])

# duplicates_in_ot_matches_df = duplicates_in_ot_matches_df[duplicates_in_ot_matches_df['match'] == 'y']

print("OLD VENDOR MAPPING")
old_vendor_mapping_df.info()
print("NEW VENDOR MAPPING")
result_mapped_df.info()

# Check if vendor data has changed, and if so then get alerts from relevant changes for relevant rows
tqdm_desc = "Creating Vendor Mapping Alerts"
tqdm.pandas(desc=tqdm_desc)
qb_vendor_data_changed = False
if not old_vendor_mapping_df.empty:
    try:
        if vendor_mapping_changed(old_vendor_mapping_df, result_mapped_df, old_vendor_mapping_df.columns.tolist()):
            print("[+] Vendor map changes detected")
            relevant_cols = ['id_qb', 'organization_value', 'status_qb']
            qb_vendor_data_changed = True
            temp_vendor_alerts_df = result_mapped_df.copy()
            # vendor_alerts_df = pd.concat([temp_vendor_alerts_df, old_vendor_mapping_df]).drop_duplicates(keep=False,
            #                                                                                              subset=relevant_cols)
            old_suffix = '_old'
            new_suffix = '_new'
            # temp_vendor_alerts_df.info()  Error free mapping of ot to qb vendors - uses manually inputted map
            # old_vendor_mapping_df.info()  Previous error free mapping that is in the database at the time of running

            # This merges both the previous and just-now built mapping for ot to qb vendors
            vendor_alerts_df = old_vendor_mapping_df.merge(temp_vendor_alerts_df,
                                                           how='left',
                                                           left_on='id_ot',
                                                           right_on='id_ot',
                                                           indicator=False,
                                                           copy=True,
                                                           suffixes=[old_suffix, new_suffix])
            # vendor_alerts_df.info()
            vendor_alerts_df['alert'] = vendor_alerts_df.progress_apply(lambda x:
                                                                        create_vendor_alert(
                                                                            x[relevant_cols[0] + old_suffix],
                                                                            x[relevant_cols[1] + old_suffix],
                                                                            x[relevant_cols[2] + old_suffix],
                                                                            x[relevant_cols[0] + new_suffix],
                                                                            x[relevant_cols[1] + new_suffix],
                                                                            x[relevant_cols[2] + new_suffix]),
                                                                        axis=1)

            vendor_alerts_df = vendor_alerts_df.loc[vendor_alerts_df['alert'] != '']
    except:
        pass

# ----------------------------------------

# Data Export back to Postgres Server (Converting Pandas dataframes to SQL tables)
print("[+] Converting & Uploading pandas dataframes to PostgreSQL...")
qb_df.to_sql('quickbase_vendors', con=alchemy_conn, if_exists='replace', index=False)
ot_df.to_sql('onetrust_vendors', con=alchemy_conn, if_exists='replace', index=False)
ot_to_qb_conflicts.to_sql('false_positives_in_automated_mapping', con=alchemy_conn, if_exists='replace', index=False)
ot_vendors_missing_from_mapping.to_sql('missing_from_ot', con=alchemy_conn, if_exists='replace', index=False)
qb_vendors_missing_from_mapping.to_sql('missing_from_qb', con=alchemy_conn, if_exists='replace', index=False)
duplicates_in_ot_matches_df.to_sql('duplicates_in_ot', con=alchemy_conn, if_exists='replace', index=False)
duplicates_in_qb_matches_df.to_sql('duplicates_in_qb', con=alchemy_conn, if_exists='replace', index=False)
result_mapped_df.to_sql('mapped_vendor_data', con=alchemy_conn, if_exists='replace', index=False)
ot_to_qb_similar_matches_df.to_sql('all_similar_vendors_from_python', con=alchemy_conn, if_exists='replace',
                                   index=False)
new_to_ot_df.to_sql('potential_new_matches_to_ot', con=alchemy_conn, if_exists='replace', index=False)
if qb_vendor_data_changed and not old_vendor_mapping_df.empty:
    vendor_alerts_df.to_sql('vendor_alerts', con=alchemy_conn, if_exists='replace', index=False)

print("[+] Data finished uploading...")

# Test to see if table was populated
# sql_query = '''select * from test_pandas;'''
# cur.execute(sql_query)
# for i in cur.fetchall():
#     print(i)

alchemy_conn.close()