Nov-15-2016, 03:22 PM
Dear All,
Here is my python script
gene_id
ENSMUSG00000015452.14 Ager
ENSMUSG00000029916.11 Agk
ENSMUSG00000033400.14 Agl
ENSMUSG00000054932.6 Afp
Name: gene_name, dtype: object
Here is the error
Here is my python script
import pandas as pd import numpy as np from matplotlib import pyplot as plt from matplotlib import style style.use('ggplot') import lzma import feather import json from urllib import request exec(request.urlopen('/master/tailseeker/stats.py').read().decode()) exec(request.urlopen('/master/tailseeker/plotutils.py').read().decode()) %cd /home/work/tagcounts/ SAMPLES = 'S1 S2 S3 S4 S5'.split() tagcounts = { sample: pd.read_msgpack(lzma.open(sample + '-single-U-canonical.msgpack.xz', 'rb')) for sample in SAMPLES } genedb=feather.read_dataframe('/home/work/annotations-gene.feather').set_index('gene_id') GENES_TO_PLOT = 'Ager Agk Agl Afp'.split() gene_ids = genedb[genedb['gene_name'].isin(GENES_TO_PLOT)]['gene_name'] gene_ids len(gene_ids) COLORS = colormap_lch(len(gene_ids), end=200, lum=50) COLORS len(COLORS) fig, axes = plt.subplots(len(gene_ids), 1, figsize=(6, 7)) xpts = np.arange(0, 16.1, 0.1) xticks = np.arange(0, 16.1, 2) xticklabels = (xticks ** 2).astype(int) for (gene_id, gene_name), ax in zip(gene_ids.items(), axes): ax.set_title(gene_name) for s, color in zip(SAMPLES, COLORS): tcnt = tagcounts[s][gene_id].sum(axis=1) kde = gaussian_kde(tcnt.index ** 0.5,weights=np.array(tcnt), bw_method=0.1) ax.plot(xpts, kde(xpts), c=color, label=s) ax.set_xticks(xticks) ax.set_xticklabels(xticklabels) ax.set_ylabel('Density') ax.set_xlabel('Poly(A) length') ax.legend(loc='best', fontsize=10) plt.tight_layout()The output of gene_ids is like this
gene_id
ENSMUSG00000015452.14 Ager
ENSMUSG00000029916.11 Agk
ENSMUSG00000033400.14 Agl
ENSMUSG00000054932.6 Afp
Name: gene_name, dtype: object
Here is the error
Error:KeyError Traceback (most recent call last)
<ipython-input-16-b4027e8687df> in <module>()
9
10 for s, color in zip(SAMPLES, COLORS):
---> 11 tcnt = tagcounts[s][gene_id].sum(axis=1)
12 kde = gaussian_kde(tcnt.index ** 0.5,
13 weights=np.array(tcnt), bw_method=0.1)
/usr/local/lib/python3.5/dist-packages/pandas/core/panel.py in __getitem__(self, key)
268 return self._getitem_multilevel(key)
269 if not (is_list_like(key) or isinstance(key, slice)):
--> 270 return super(Panel, self).__getitem__(key)
271 return self.ix[key]
272
/usr/local/lib/python3.5/dist-packages/pandas/core/generic.py in __getitem__(self, item)
1081
1082 def __getitem__(self, item):
-> 1083 return self._get_item_cache(item)
1084
1085 def _get_item_cache(self, item):
/usr/local/lib/python3.5/dist-packages/pandas/core/generic.py in _get_item_cache(self, item)
1088 res = cache.get(item)
1089 if res is None:
-> 1090 values = self._data.get(item)
1091 res = self._box_item_values(item, values)
1092 cache[item] = res
/usr/local/lib/python3.5/dist-packages/pandas/core/internals.py in get(self, item, fastpath)
3100
3101 if not isnull(item):
-> 3102 loc = self.items.get_loc(item)
3103 else:
3104 indexer = np.arange(len(self.items))[isnull(self.items)]
/usr/local/lib/python3.5/dist-packages/pandas/core/index.py in get_loc(self, key, method, tolerance)
1690 raise ValueError('tolerance argument only valid if using pad, '
1691 'backfill or nearest lookups')
-> 1692 return self._engine.get_loc(_values_from_object(key))
1693
1694 indexer = self.get_indexer([key], method=method,
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3979)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3843)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12265)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12216)()
KeyError: 'ENSMUSG00000015452.14'
URL was edited because this is my first post. But the URL in exec.code section was working fine. No issues with it. I downgraded pandas from 0.19.1 to 0.17.0 and the error still persists. All pointers/comments/suggestions/solutions are appreciated. Thanks in advance.