Python Forum
Pandas dataframe to join three tables using like condition among them - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: Data Science (https://python-forum.io/forum-44.html)
+--- Thread: Pandas dataframe to join three tables using like condition among them (/thread-22843.html)



Pandas dataframe to join three tables using like condition among them - sandeep_ganga - Nov-29-2019

Hi Forum,

I am very new to python , trying to compare if joins with dataframes happen faster than processing on database.

Consider three tables say,
tablea(dbid,dbname,a1,a2),
tableb(dbid,createtext,tbkind.tvname,b1,b2),
tablec(dbname,tbname,tbkind,c1,c2). 
i have condition --> a.dbid=b.dbid and b.createtext like '%c.dbname.c.tbname%'.
query: select a.dbname,b.tvname,c.tbkind,c.tbname,c.dbname from tablea a,tableb b,tablec c where a.dbid=b.dbid and b.createtext like '%"'||c.dbname||'"."'||c.tbname||'"%';
I have moved the each table data to each dataframes using read_sql_table, now i am struggling to join them all together and select, Any ideas?
at first ,i tried with join tablea, tableb with join and on condition, but how do i do the other condition having like operator stuff.


putting all together:

database testing;
create table tablea(dbid int,	dbname varchar(30),	x	int,y int);
create table tableb(dbid int,	createtext	varchar(100),tbkind	varchar(10), tvname varchar(10));
create table tablec(z	int,dbname	varchar(30),tbname	varchar(30),tbkind varchar(10));

insert into tablea(11,'db1',10,20 );
insert into tablea(22,'db2',30,60 );
insert into tablea(33,'db3',40,70 );
insert into tablea(44,'db4',50,80 );
insert into tablea(55,'db5',90,10 );

insert into tableb(11,'nothing','t','a');
insert into tableb(22,'nothing','t','b');
insert into tableb(33,'create view def as select * from "db4"."tb4";',	'v',	'c');
insert into tableb(44,'create view abc as select * from "db5"."tb5";','v',	'd');
insert into tableb(55,'create view abc as select * from "db4"."tb4";',	'v',	'e');

insert into tablec(1,'db1','tb1','t');
insert into tablec(2,'db2','tb2','t');
insert into tablec(3,'db3','tb3','t');
insert into tablec(4,'db4','tb4','t');
insert into tablec(5,'db5','tb5','t');

select a.dbname,b.tvname,c.tbkind,c.tbname,c.dbname from tablea a,tableb b,tablec c where a.dbid=b.dbid and b.createtext like '%"'||c.dbname||'"."'||c.tbname||'"%';
Output:
tablea dbid dbname x y 11 db1 10 20 22 db2 30 60 33 db3 40 70 44 db4 50 80 55 db5 90 10 tableb dbid createtext tbkind tvname 11 nothing t a 22 nothing t b 33 create view def as select * from "db4"."tb4" v c 44 create view abc as select * from "db5"."tb5" v d 55 create view abc as select * from "db4"."tb4" v e tablec z dbname tbname tbkind 1 db1 tb1 t 2 db2 tb2 t 3 db3 tb3 t 4 db4 tb4 t 5 db5 tb5 t
testing.py
import pandas as pd
import sqlalchemy
import getpass
import pandas as pd 
import subprocess
import sys



from sqlalchemy import create_engine
pw= getpass.getpass(prompt='Password: ')
user = 'testuser'
host = 'devserver'
engine = sqlalchemy.create_engine('sql://'+ user +':' + pw + '@'+ host ,echo=True)


dftablea = pd.read_sql_table('tablea',con=engine,index_col=None, coerce_float=True, columns=None, chunksize=None)
blankIndex=[''] * len(dftablea)
dftablea.index=blankIndex
print(dftablea)

dftableb = pd.read_sql_table('tableb',con=engine,index_col=None, coerce_float=True, columns=None, chunksize=None)
blankIndex=[''] * len(dftableb)
dftableb.index=blankIndex
print(dftableb)

dftablec = pd.read_sql_table('tablec',con=engine,index_col=None, coerce_float=True, columns=None, chunksize=None)
blankIndex=[''] * len(dftablec)
dftablec.index=blankIndex
print(dftablec)

merged_dataframe = pd.merge(dftablea, dftableb, left_on='dbid',right_on='dbid',how='inner')
blankIndex=[''] * len(merged_dataframe)
merged_dataframe.index=blankIndex
print(merged_dataframe)
now not sure on like operator stuff, Any ideas?


Best Regards,
Sandeep

GANGA SANDEEP KUMAR

source looks like:

Output:
tablea dbid dbname x y 11 db1 10 20 22 db2 30 60 33 db3 40 70 44 db4 50 80 55 db5 90 10 tableb dbid createtext tbkind tvname 11 nothing t a 22 nothing t b 33 create view def as select * from "db4"."tb4" v c 44 create view abc as select * from "db5"."tb5" v d 55 create view abc as select * from "db4"."tb4" v e tablec z dbname tbname tbkind 1 db1 tb1 t 2 db2 tb2 t 3 db3 tb3 t 4 db4 tb4 t 5 db5 tb5 t
import pandas as pd
import sqlalchemy
import teradatasql
import getpass
import pandas as pd 
from teradataml import *
import subprocess
import sys


dftablea = pd.read_csv('dftablea.csv', sep=",", index_col=False)
blankIndex=[''] * len(dftablea)
dftablea.index=blankIndex
#make column names to lower
dftablea.columns = dftablea.columns.str.lower()  
print(dftablea)

dftableb = pd.read_csv('dftableb.csv', sep=",", index_col=False)
blankIndex=[''] * len(dftablea)
dftableb.index=blankIndex
#make column names to lower
dftableb.columns = dftableb.columns.str.lower()  
print(dftableb)

dftablec = pd.read_csv('dftablec.csv', sep=",", index_col=False)
blankIndex=[''] * len(dftablec)
dftablec.index=blankIndex
#make column names to lower
dftablec.columns = dftablec.columns.str.lower()  
print(dftablec)


merged_dataframe = pd.merge(dftablea, dftableb, left_on='dbid',right_on='dbid',how='outer')
blankIndex=[''] * len(merged_dataframe)
merged_dataframe.index=blankIndex
print(merged_dataframe)