barChart = pygal.Bar(height=400) [barChart.add(x[0], x[1]) for x in mean_per_state.items()] display(HTML(base_html.format(rendered_chart=barChart.render(is_unicode=True))))
#Import needed libraries import pygal import pandas as pd #Parse the dataframe data = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv") #Get the mean number of cases per states mean_per_state = data.groupby('state')['cases'].mean() #Draw the bar chart barChart = pygal.Bar(height=400) [barChart.add(x[0], x[1]) for x in mean_per_state.items()] display(HTML(base_html.format(rendered_chart=barChart.render(is_unicode=True))))
treemap = pygal.Treemap(height=400) [treemap.add(x[0], x[1][:10]) for x in top_10_states.items()] display(HTML(base_html.format(rendered_chart=treemap.render(is_unicode=True))))
#Import needed libraries import pygal import pandas as pd #Parse the dataframe data = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv") #Sort states by cases count sort_by_cases = data.sort_values(by=['cases'],ascending=False).groupby(['state'])['cases'].apply(list) #Get the top 10 states with the highest number of cases top_10_states = sort_by_cases[:10] #Draw the treemap treemap = pygal.Treemap(height=400) [treemap.add(x[0], x[1][:10]) for x in top_10_states.items()] display(HTML(base_html.format(rendered_chart=treemap.render(is_unicode=True))))
#Get the cases by county for all states cases_by_county = data.sort_values(by=['cases'],ascending=False).groupby(['state'], axis=0).apply( lambda x : [{"value" : l, "label" : c } for l, c in zip(x['cases'], x['county'])]) cases_by_county= cases_by_county[:10] #Create a new dictionary that contains the cleaned up version of the data clean_dict = {} start_dict= cases_by_county.to_dict() for key in start_dict.keys(): values = [] labels = [] county = [] for item in start_dict[key]: if item['label'] not in labels: labels.append(item['label']) values.append(item['value']) else: i = labels.index(item['label']) values[i] += item['value'] for l,v in zip(labels, values): county.append({'value':v, 'label':l}) clean_dict[key] = county #Convert the data to Pandas series to add it to the treemap new_series = pd.Series(clean_dict)
然后,我们可以将该系列添加到treemap,并绘制它的标记版本。
treemap = pygal.Treemap(height=200) [treemap.add(x[0], x[1][:10]) for x in new_series.iteritems()] display(HTML(base_html.format(rendered_chart=treemap.render(is_unicode=True))))
太棒了!现在我们的树形图被标记了。如果将鼠标悬停在这些块上,就可以看到县的名称、州和该县的病例数。
完整的代码
#Import needed libraries import pygal import pandas as pd #Parse the dataframe data = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv") #Get the cases by county for all states cases_by_county = data.sort_values(by=['cases'],ascending=False).groupby(['state'], axis=0).apply( lambda x : [{"value" : l, "label" : c } for l, c in zip(x['cases'], x['county'])]) cases_by_county= cases_by_county[:10] #Create a new dictionary that contains the cleaned up version of the data clean_dict = {} start_dict= cases_by_county.to_dict() for key in start_dict.keys(): values = [] labels = [] county = [] for item in start_dict[key]: if item['label'] not in labels: labels.append(item['label']) values.append(item['value']) else: i = labels.index(item['label']) values[i] += item['value'] for l,v in zip(labels, values): county.append({'value':v, 'label':l}) clean_dict[key] = county #Convert the data to Pandas series to add it to the treemap new_series = pd.Series(clean_dict) #Draw the treemap treemap = pygal.Treemap(height=200) [treemap.add(x[0], x[1][:10]) for x in new_series.iteritems()] display(HTML(base_html.format(rendered_chart=treemap.render(is_unicode=True))))
first10 = list(sort_by_cases.items())[:10] [pi_chart.add(x[0], x[1]) for x in first10] display(HTML(base_html.format(rendered_chart=pi_chart.render(is_unicode=True))))
饼状图的完整代码
#Import needed libraries import pygal import pandas as pd #Parse the dataframe data = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv") #Get the mean number of cases per states sort_by_cases = data.sort_values(by=['cases'],ascending=False).groupby(['state'])['cases'].apply(list) #Draw the bar chart pi_chart = pygal.Pie(height=400) #Get the top 10 states first10 = list(sort_by_cases.items())[:10] [pi_chart.add(x[0], x[1]) for x in first10] display(HTML(base_html.format(rendered_chart=pi_chart.render(is_unicode=True))))