Posts: 2
Threads: 1
Joined: Sep 2020
Hello All,
I'm trying to calculate a nested effect mixed model where both a and b are factors (in R terminology) or class variables (in SAS). The idea is that a random factor, b, is nested in the fixed effect a.
In R I would write:
df$a <- factor(df$a)
df$b <- factor(df$b)
test.model <- lmer(val ~ a+(1|b), data=df)
anova(test.model)
And I get the correct answer.
I know several wrong ways to do this in Python, hopefully someone can point me towards a correct on.
import statsmodels.formula.api as smf
import pandas as pd
Problem_data = {"a":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2],
"b":[1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7,8,8,8,8],
"responce":[3,6,3,3,1,2,2,2,5,6,5,6,2,3,4,3,7,8,7,6,4,5,4,3,7,8,9,8,10,10,9,11]}
df_11_4 = pd.DataFrame(Problem_data,columns=['a','b','responce'])
md = smf.mixedlm("responce ~ C(a)", df_11_4, groups=df_11_4["b"])
mdf = md.fit()
print(mdf.f_test("C(a)[T.2]")) This yields:
F test: F=array([[6.45933505]]), p=0.016442613303300678, df_denom=30, df_num=1
which is wrong--well, I'm sure it is right, but it's not what I'm trying to get.
If I had done it correctly, df_denom would equal 6 and p=0.044.
For the record, this problem is the worked example in "Experimental Design: Procedures for the Behavioral Science 3", Kirk, 1995. p.482. I use this as my "known correct" example.
Posts: 2
Threads: 1
Joined: Sep 2020
I thought I would post an update here for anyone coming later--and maybe someone will see something new that helps.
I have figured out how to get the two variables encoded as type categorical--which I believe to be the equivalent of R's Factor. But I now get an "Unsupported Operand type(s)" error from Patsy.
Here is the code with the full text of the error at the end.
#%%
import statsmodels.formula.api as smf
import pandas as pd
from patsy import dmatrices, dmatrix, demo_data
import statsmodels.regression.mixed_linear_model as smmlm
Problem_data = {"a":[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2],
"b":[1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7,8,8,8,8],
"response":[3,6,3,3,1,2,2,2,5,6,5,6,2,3,4,3,7,8,7,6,4,5,4,3,7,8,9,8,10,10,9,11]}
df_11_4 = pd.DataFrame(Problem_data,columns=['a','b','response'])
#%%
df_11_4["a"] = df_11_4["a"].astype('category')
df_11_4["b"] = df_11_4["b"].astype('category')
print(df_11_4.dtypes)
#%%
# This gives an error
dmatrices("response ~ a+(1|b)", df_11_4)
# PatsyError: Error evaluating factor: TypeError: unsupported operand type(s) for |: 'int' and 'Categorical'
# response ~ a+(1|b)
# ^^^^^
#%%
md = smf.mixedlm("response ~ a+(1|b)", df_11_4, groups=df_11_4["b"])
# PatsyError: Error evaluating factor: TypeError: unsupported operand type(s) for |: 'int' and 'Categorical'
# response ~ a+(1|b)
# ^^^^^
print(dmatrix(df_11_4))
mdf = md.fit()
print(mdf.summary())
print(mdf.f_test("C(a)[T.2]"))
#----------------------------------------------------------
# Full Error message
#----------------------------------------------------------
"""
dmatrices("response ~ a+(1|b)", df_11_4)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
35 try:
---> 36 return f(*args, **kwargs)
37 except Exception as e:
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\patsy\eval.py in eval(self, expr, source_name, inner_namespace)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
<string> in <module>
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
64
---> 65 return method(self, other)
66
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\pandas\core\ops\__init__.py in wrapper(self, other)
393
--> 394 res_values = logical_op(lvalues, rvalues, op)
395 return self._construct_result(res_values, name=res_name)
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\pandas\core\ops\array_ops.py in logical_op(left, right, op)
335 # Call the method on lvalues
--> 336 res_values = op(lvalues, rvalues)
337
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\pandas\core\ops\roperator.py in ror_(left, right)
55 def ror_(left, right):
---> 56 return operator.or_(right, left)
57
TypeError: unsupported operand type(s) for |: 'int' and 'Categorical'
The above exception was the direct cause of the following exception:
PatsyError Traceback (most recent call last)
c:\Users\rledu\Documents\_Bioinformatics\python\ANOVA_examples\Forum_Example_2.py in
----> 19 dmatrices("response ~ a+(1|b)", df_11_4)
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\patsy\highlevel.py in dmatrices(formula_like, data, eval_env, NA_action, return_type)
308 eval_env = EvalEnvironment.capture(eval_env, reference=1)
309 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 310 NA_action, return_type)
311 if lhs.shape[1] == 0:
312 raise PatsyError("model is missing required outcome variables")
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\patsy\highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
163 return iter([data])
164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
--> 165 NA_action)
166 if design_infos is not None:
167 return build_design_matrices(design_infos, data,
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\patsy\highlevel.py in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
68 data_iter_maker,
69 eval_env,
---> 70 NA_action)
71 else:
72 return None
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\patsy\build.py in design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action)
694 factor_states,
695 data_iter_maker,
--> 696 NA_action)
697 # Now we need the factor infos, which encapsulate the knowledge of
698 # how to turn any given factor into a chunk of data:
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\patsy\build.py in _examine_factor_types(factors, factor_states, data_iter_maker, NA_action)
441 for data in data_iter_maker():
442 for factor in list(examine_needed):
--> 443 value = factor.eval(factor_states[factor], data)
444 if factor in cat_sniffers or guess_categorical(value):
445 if factor not in cat_sniffers:
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\patsy\eval.py in eval(self, memorize_state, data)
564 return self._eval(memorize_state["eval_code"],
565 memorize_state,
--> 566 data)
567
568 __getstate__ = no_pickling
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\patsy\eval.py in _eval(self, code, memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg, origin, f, *args, **kwargs)
41 origin)
42 # Use 'exec' to hide this syntax from the Python 2 parser:
---> 43 exec("raise new_exc from e")
44 else:
45 # In python 2, we just let the original exception escape -- better
C:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\site-packages\patsy\compat.py in <module>
PatsyError: Error evaluating factor: TypeError: unsupported operand type(s) for |: 'int' and 'Categorical'
response ~ a+(1|b)
^^^^^
"""
|