X 轴乱序 seaborn histplot [英] X-axis out of order seaborn histplot
问题描述
我正在尝试创建一个
添加 order
参数
或者,sns.histplot()
可以用对数 x 轴显示,使条宽更相等,同时保持数字轴.在这种情况下,可以根据值的对数计算 kde.
from scipy.stats import gaussian_kde箱 = [0, 100, 500, 2000, 5000, 10000, 20000, 40000, 100000]图, ax = plt.subplots(figsize=(12, 4))sns.histplot(成本,bins=bins,stat='probability',ec='black',lw=1,ax=ax)xs = np.logspace(2, np.log10(bins[-1]), 500)kde = gaussian_kde(np.log(costs))ax.plot(xs, kde(np.log(xs)), color='crimson')ax.set_xscale('log')ax.set_xticks(bins[1:-1])ax.set_xticks([], minor=True)ax.xaxis.set_major_formatter(ScalarFormatter())ax.yaxis.set_major_formatter(PercentFormatter(1))
I am trying to create a seaborn histplot
and am almost done, however, I noticed that my x-axis is out of order.
original_data = {0.0: 29076, 227.92: 26401, 473.51: 12045, 195.98: 7500, 495.0: 3750, 53.83: 3750, 385.0: 3750, 97.08: 3750, 119.39: 3750, 118.61: 3750, 30.0: 3750, 13000.0: 3750, 553.22: 3750, 1420.31: 3750, 1683.03: 3750, 1360.48: 3750, 1361.16: 3750, 1486.66: 3750, 1398.5: 3750, 4324.44: 3750, 4500.0: 3750, 1215.51: 3750, 1461.27: 3750, 772.5: 3750, 3330.0: 3750, 915.75: 3750, 2403.1225: 3750, 1119.5: 3750, 2658.13: 3618, 492.0: 1818, 10000.0: 1809, 0.515: 1809, 118.305: 1809, 215.0: 1809, 513.0: 1809, 237.5: 1809, 15452.5: 1809, 377838.0: 1809, 584983.0: 1809, 10772.61: 1809, 883.87: 1809, 110494.0: 1809, 2727.0: 1809, 1767.0: 1809, 4792.5: 1809, 6646.5: 1809, 7323.75: 1809, 4399.5: 1809, 2737.5: 1809, 9088.5: 1809, 6405.0: 1809, 0.36: 1809, 112.055: 1809, 247.5: 1809, 232.5: 1809, 18000.0: 1809, 38315.0: 1809, 8100.0: 1809, 63115.34: 1809, 27551.0: 1809, 6398.58: 1809, 78.0: 1809, 26.0: 1809, 1413.0: 1809, 2230.5: 1809, 604.5: 1809, 4037.25: 1809, 18507.0: 1809, 732.75: 1809, 22665.0: 1809, 12212.25: 1809, 17833.5: 1809, 4177.5: 1809, 1521.0: 1809, 2307.0: 1809, 1873.5: 1809, 1948.5: 1809, 1182.0: 1809, 1473.0: 1695}
import pandas as pd, numpy as np, seaborn as sns, matplotlib.pyplot as plt
from collections import Counter
df = pd.read_csv('data.csv')
costs = df['evals'].to_numpy()
original_data = Counter(df['evals'].to_numpy())
new = []
for c in costs:
if c >= 0 and c < 100:
new.append('<\$100')
elif c >= 100 and c < 500:
new.append('<\$500 and >= \$100')
elif c >= 500 and c < 2000:
new.append('<\$500 and >= \$2000')
elif c >= 2000 and c < 5000:
new.append('<\$2000 and >= \$500')
elif c >= 5000 and c < 10000:
new.append('<\$10000 and >= \$5000')
elif c >= 10000 and c < 20000:
new.append('<\$20000 and >= \$10000')
elif c >= 20000 and c < 40000:
new.append('<\$40000 and >= \$20000')
else:
new.append('>= \$40000')
order = ['<\$100', '<\$500 and >= \$100', '<\$500 and >= \$2000', '<\$2000 and >= \$500',
'<\$10000 and >= \$5000', '<\$20000 and >= \$10000', '<\$40000 and >= \$20000']
plt.figure(figsize=(20,8))
sns.set_style("darkgrid")
sns.histplot(data=new, stat='probability', kde=True)
plt.show()
Displays:
Adding order
argument as shown here creates the following error(s):
Traceback (most recent call last):
File "c:\Users\wundermahn\eval_plots.py", line 28, in <module>
sns.histplot(data=new, stat='probability', kde=True, order=order)
File "C:\Python367-64\lib\site-packages\seaborn\distributions.py", line 1435, in histplot
**kwargs,
File "C:\Python367-64\lib\site-packages\seaborn\distributions.py", line 508, in plot_univariate_histogram
scout = self.ax.fill_between([], [], color=color, **plot_kws)
File "C:\Python367-64\lib\site-packages\matplotlib\__init__.py", line 1565, in inner
return func(ax, *map(sanitize_sequence, args), **kwargs)
File "C:\Python367-64\lib\site-packages\matplotlib\axes\_axes.py", line 5229, in fill_between
collection = mcoll.PolyCollection(polys, **kwargs)
File "C:\Python367-64\lib\site-packages\matplotlib\collections.py", line 1072, in __init__
Collection.__init__(self, **kwargs)
File "C:\Python367-64\lib\site-packages\matplotlib\collections.py", line 164, in __init__
self.update(kwargs)
File "C:\Python367-64\lib\site-packages\matplotlib\artist.py", line 1006, in update
ret = [_update_property(self, k, v) for k, v in props.items()]
File "C:\Python367-64\lib\site-packages\matplotlib\artist.py", line 1006, in <listcomp>
ret = [_update_property(self, k, v) for k, v in props.items()]
File "C:\Python367-64\lib\site-packages\matplotlib\artist.py", line 1002, in _update_property
.format(type(self).__name__, k))
AttributeError: 'PolyCollection' object has no property 'order'
How can I force that order on my x-axis
?
You could create a bar plot, using np.histogram
to count how many values are in each bin. The bins need to be set explicitly, as they aren't equally spaced.
Using sns.histplot
directly on the costs
array would show bars with all different widths, which looks quite confusing. Also note that you can't show a kde when the x-axis isn't numeric.
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter, ScalarFormatter
original_data = {0.0: 29076, 227.92: 26401, 473.51: 12045, 195.98: 7500, 495.0: 3750, 53.83: 3750, 385.0: 3750, 97.08: 3750, 119.39: 3750, 118.61: 3750, 30.0: 3750, 13000.0: 3750, 553.22: 3750, 1420.31: 3750, 1683.03: 3750, 1360.48: 3750, 1361.16: 3750, 1486.66: 3750, 1398.5: 3750, 4324.44: 3750, 4500.0: 3750, 1215.51: 3750, 1461.27: 3750, 772.5: 3750, 3330.0: 3750, 915.75: 3750, 2403.1225: 3750, 1119.5: 3750, 2658.13: 3618, 492.0: 1818, 10000.0: 1809, 0.515: 1809, 118.305: 1809, 215.0: 1809, 513.0: 1809, 237.5: 1809, 15452.5: 1809, 377838.0: 1809, 584983.0: 1809, 10772.61: 1809, 883.87: 1809, 110494.0: 1809, 2727.0: 1809, 1767.0: 1809, 4792.5: 1809, 6646.5: 1809, 7323.75: 1809, 4399.5: 1809, 2737.5: 1809, 9088.5: 1809, 6405.0: 1809, 0.36: 1809, 112.055: 1809, 247.5: 1809, 232.5: 1809, 18000.0: 1809, 38315.0: 1809, 8100.0: 1809, 63115.34: 1809, 27551.0: 1809, 6398.58: 1809, 78.0: 1809, 26.0: 1809, 1413.0: 1809, 2230.5: 1809, 604.5: 1809, 4037.25: 1809, 18507.0: 1809, 732.75: 1809, 22665.0: 1809, 12212.25: 1809, 17833.5: 1809, 4177.5: 1809, 1521.0: 1809, 2307.0: 1809, 1873.5: 1809, 1948.5: 1809, 1182.0: 1809, 1473.0: 1695}
costs = list(original_data.values())
bins = [0, 100, 500, 2000, 5000, 10000, 20000, 40000, 1000000]
bin_values, bin_edges = np.histogram(costs, bins=bins)
labels = [f'< \${b0} and\n>= \${b1}' for b0, b1 in zip(bins[1:-2], bins[2:-1])]
labels = [f'< \${bins[1]}'] + labels + [f'>= \${bins[-2]}']
fig, ax = plt.subplots(figsize=(12, 4))
sns.barplot(x=labels, y=bin_values / bin_values.sum(), color='dodgerblue', ax=ax)
ax.yaxis.set_major_formatter(PercentFormatter(1))
plt.show()
Alternatively, sns.histplot()
could be displayed with a logarithmic x-axis to make the bar widths more equal while maintaining a numeric axis. In that case a kde could be calculated on the logs of the values.
from scipy.stats import gaussian_kde
bins = [0, 100, 500, 2000, 5000, 10000, 20000, 40000, 100000]
fig, ax = plt.subplots(figsize=(12, 4))
sns.histplot(costs, bins=bins, stat='probability', ec='black', lw=1, ax=ax)
xs = np.logspace(2, np.log10(bins[-1] ), 500)
kde = gaussian_kde(np.log(costs) )
ax.plot(xs, kde(np.log(xs)), color='crimson')
ax.set_xscale('log')
ax.set_xticks(bins[1:-1])
ax.set_xticks([], minor=True)
ax.xaxis.set_major_formatter(ScalarFormatter())
ax.yaxis.set_major_formatter(PercentFormatter(1))
这篇关于X 轴乱序 seaborn histplot的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!