Hi Forum,
I am very new to python , trying to compare if joins with dataframes happen faster than processing on database.
Consider three tables say,
at first ,i tried with join tablea, tableb with join and on condition, but how do i do the other condition having like operator stuff.
putting all together:
Best Regards,
Sandeep
GANGA SANDEEP KUMAR
source looks like:
I am very new to python , trying to compare if joins with dataframes happen faster than processing on database.
Consider three tables say,
tablea(dbid,dbname,a1,a2), tableb(dbid,createtext,tbkind.tvname,b1,b2), tablec(dbname,tbname,tbkind,c1,c2).i have condition --> a.dbid=b.dbid and b.createtext like '%c.dbname.c.tbname%'.
query: select a.dbname,b.tvname,c.tbkind,c.tbname,c.dbname from tablea a,tableb b,tablec c where a.dbid=b.dbid and b.createtext like '%"'||c.dbname||'"."'||c.tbname||'"%';I have moved the each table data to each dataframes using read_sql_table, now i am struggling to join them all together and select, Any ideas?
at first ,i tried with join tablea, tableb with join and on condition, but how do i do the other condition having like operator stuff.
putting all together:
database testing; create table tablea(dbid int, dbname varchar(30), x int,y int); create table tableb(dbid int, createtext varchar(100),tbkind varchar(10), tvname varchar(10)); create table tablec(z int,dbname varchar(30),tbname varchar(30),tbkind varchar(10)); insert into tablea(11,'db1',10,20 ); insert into tablea(22,'db2',30,60 ); insert into tablea(33,'db3',40,70 ); insert into tablea(44,'db4',50,80 ); insert into tablea(55,'db5',90,10 ); insert into tableb(11,'nothing','t','a'); insert into tableb(22,'nothing','t','b'); insert into tableb(33,'create view def as select * from "db4"."tb4";', 'v', 'c'); insert into tableb(44,'create view abc as select * from "db5"."tb5";','v', 'd'); insert into tableb(55,'create view abc as select * from "db4"."tb4";', 'v', 'e'); insert into tablec(1,'db1','tb1','t'); insert into tablec(2,'db2','tb2','t'); insert into tablec(3,'db3','tb3','t'); insert into tablec(4,'db4','tb4','t'); insert into tablec(5,'db5','tb5','t'); select a.dbname,b.tvname,c.tbkind,c.tbname,c.dbname from tablea a,tableb b,tablec c where a.dbid=b.dbid and b.createtext like '%"'||c.dbname||'"."'||c.tbname||'"%';
Output:tablea
dbid dbname x y
11 db1 10 20
22 db2 30 60
33 db3 40 70
44 db4 50 80
55 db5 90 10
tableb
dbid createtext tbkind tvname
11 nothing t a
22 nothing t b
33 create view def as select * from "db4"."tb4" v c
44 create view abc as select * from "db5"."tb5" v d
55 create view abc as select * from "db4"."tb4" v e
tablec
z dbname tbname tbkind
1 db1 tb1 t
2 db2 tb2 t
3 db3 tb3 t
4 db4 tb4 t
5 db5 tb5 t
testing.pyimport pandas as pd import sqlalchemy import getpass import pandas as pd import subprocess import sys from sqlalchemy import create_engine pw= getpass.getpass(prompt='Password: ') user = 'testuser' host = 'devserver' engine = sqlalchemy.create_engine('sql://'+ user +':' + pw + '@'+ host ,echo=True) dftablea = pd.read_sql_table('tablea',con=engine,index_col=None, coerce_float=True, columns=None, chunksize=None) blankIndex=[''] * len(dftablea) dftablea.index=blankIndex print(dftablea) dftableb = pd.read_sql_table('tableb',con=engine,index_col=None, coerce_float=True, columns=None, chunksize=None) blankIndex=[''] * len(dftableb) dftableb.index=blankIndex print(dftableb) dftablec = pd.read_sql_table('tablec',con=engine,index_col=None, coerce_float=True, columns=None, chunksize=None) blankIndex=[''] * len(dftablec) dftablec.index=blankIndex print(dftablec) merged_dataframe = pd.merge(dftablea, dftableb, left_on='dbid',right_on='dbid',how='inner') blankIndex=[''] * len(merged_dataframe) merged_dataframe.index=blankIndex print(merged_dataframe)now not sure on like operator stuff, Any ideas?
Best Regards,
Sandeep
GANGA SANDEEP KUMAR
source looks like:
Output:tablea
dbid dbname x y
11 db1 10 20
22 db2 30 60
33 db3 40 70
44 db4 50 80
55 db5 90 10
tableb
dbid createtext tbkind tvname
11 nothing t a
22 nothing t b
33 create view def as select * from "db4"."tb4" v c
44 create view abc as select * from "db5"."tb5" v d
55 create view abc as select * from "db4"."tb4" v e
tablec
z dbname tbname tbkind
1 db1 tb1 t
2 db2 tb2 t
3 db3 tb3 t
4 db4 tb4 t
5 db5 tb5 t
import pandas as pd import sqlalchemy import teradatasql import getpass import pandas as pd from teradataml import * import subprocess import sys dftablea = pd.read_csv('dftablea.csv', sep=",", index_col=False) blankIndex=[''] * len(dftablea) dftablea.index=blankIndex #make column names to lower dftablea.columns = dftablea.columns.str.lower() print(dftablea) dftableb = pd.read_csv('dftableb.csv', sep=",", index_col=False) blankIndex=[''] * len(dftablea) dftableb.index=blankIndex #make column names to lower dftableb.columns = dftableb.columns.str.lower() print(dftableb) dftablec = pd.read_csv('dftablec.csv', sep=",", index_col=False) blankIndex=[''] * len(dftablec) dftablec.index=blankIndex #make column names to lower dftablec.columns = dftablec.columns.str.lower() print(dftablec) merged_dataframe = pd.merge(dftablea, dftableb, left_on='dbid',right_on='dbid',how='outer') blankIndex=[''] * len(merged_dataframe) merged_dataframe.index=blankIndex print(merged_dataframe)