Pandas dataframe to join three tables using like condition among them - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: Data Science (https://python-forum.io/forum-44.html) +--- Thread: Pandas dataframe to join three tables using like condition among them (/thread-22843.html) |
Pandas dataframe to join three tables using like condition among them - sandeep_ganga - Nov-29-2019 Hi Forum, I am very new to python , trying to compare if joins with dataframes happen faster than processing on database. Consider three tables say, tablea(dbid,dbname,a1,a2), tableb(dbid,createtext,tbkind.tvname,b1,b2), tablec(dbname,tbname,tbkind,c1,c2).i have condition --> a.dbid=b.dbid and b.createtext like '%c.dbname.c.tbname%'. query: select a.dbname,b.tvname,c.tbkind,c.tbname,c.dbname from tablea a,tableb b,tablec c where a.dbid=b.dbid and b.createtext like '%"'||c.dbname||'"."'||c.tbname||'"%';I have moved the each table data to each dataframes using read_sql_table, now i am struggling to join them all together and select, Any ideas? at first ,i tried with join tablea, tableb with join and on condition, but how do i do the other condition having like operator stuff. putting all together: database testing; create table tablea(dbid int, dbname varchar(30), x int,y int); create table tableb(dbid int, createtext varchar(100),tbkind varchar(10), tvname varchar(10)); create table tablec(z int,dbname varchar(30),tbname varchar(30),tbkind varchar(10)); insert into tablea(11,'db1',10,20 ); insert into tablea(22,'db2',30,60 ); insert into tablea(33,'db3',40,70 ); insert into tablea(44,'db4',50,80 ); insert into tablea(55,'db5',90,10 ); insert into tableb(11,'nothing','t','a'); insert into tableb(22,'nothing','t','b'); insert into tableb(33,'create view def as select * from "db4"."tb4";', 'v', 'c'); insert into tableb(44,'create view abc as select * from "db5"."tb5";','v', 'd'); insert into tableb(55,'create view abc as select * from "db4"."tb4";', 'v', 'e'); insert into tablec(1,'db1','tb1','t'); insert into tablec(2,'db2','tb2','t'); insert into tablec(3,'db3','tb3','t'); insert into tablec(4,'db4','tb4','t'); insert into tablec(5,'db5','tb5','t'); select a.dbname,b.tvname,c.tbkind,c.tbname,c.dbname from tablea a,tableb b,tablec c where a.dbid=b.dbid and b.createtext like '%"'||c.dbname||'"."'||c.tbname||'"%'; testing.pyimport pandas as pd import sqlalchemy import getpass import pandas as pd import subprocess import sys from sqlalchemy import create_engine pw= getpass.getpass(prompt='Password: ') user = 'testuser' host = 'devserver' engine = sqlalchemy.create_engine('sql://'+ user +':' + pw + '@'+ host ,echo=True) dftablea = pd.read_sql_table('tablea',con=engine,index_col=None, coerce_float=True, columns=None, chunksize=None) blankIndex=[''] * len(dftablea) dftablea.index=blankIndex print(dftablea) dftableb = pd.read_sql_table('tableb',con=engine,index_col=None, coerce_float=True, columns=None, chunksize=None) blankIndex=[''] * len(dftableb) dftableb.index=blankIndex print(dftableb) dftablec = pd.read_sql_table('tablec',con=engine,index_col=None, coerce_float=True, columns=None, chunksize=None) blankIndex=[''] * len(dftablec) dftablec.index=blankIndex print(dftablec) merged_dataframe = pd.merge(dftablea, dftableb, left_on='dbid',right_on='dbid',how='inner') blankIndex=[''] * len(merged_dataframe) merged_dataframe.index=blankIndex print(merged_dataframe)now not sure on like operator stuff, Any ideas? Best Regards, Sandeep GANGA SANDEEP KUMAR source looks like:
import pandas as pd import sqlalchemy import teradatasql import getpass import pandas as pd from teradataml import * import subprocess import sys dftablea = pd.read_csv('dftablea.csv', sep=",", index_col=False) blankIndex=[''] * len(dftablea) dftablea.index=blankIndex #make column names to lower dftablea.columns = dftablea.columns.str.lower() print(dftablea) dftableb = pd.read_csv('dftableb.csv', sep=",", index_col=False) blankIndex=[''] * len(dftablea) dftableb.index=blankIndex #make column names to lower dftableb.columns = dftableb.columns.str.lower() print(dftableb) dftablec = pd.read_csv('dftablec.csv', sep=",", index_col=False) blankIndex=[''] * len(dftablec) dftablec.index=blankIndex #make column names to lower dftablec.columns = dftablec.columns.str.lower() print(dftablec) merged_dataframe = pd.merge(dftablea, dftableb, left_on='dbid',right_on='dbid',how='outer') blankIndex=[''] * len(merged_dataframe) merged_dataframe.index=blankIndex print(merged_dataframe) |