Lecture Notes 12#

import numpy as np
import matplotlib.pyplot as plt

A more object oriented approch to plotting

  • work with figure and axes objects

plt.subplots()
(<Figure size 640x480 with 1 Axes>, <Axes: >)
_images/9787f8a2451f2e7e36145ae170b7a0df1c349c31116bdd2985cc12a5f053b850.png
plt.subplots(nrows=2)
(<Figure size 640x480 with 2 Axes>, array([<Axes: >, <Axes: >], dtype=object))
_images/81ffc1b3779cacf48e3df503681fb84c9bf6285dd78f97f018fe3d8dfbbfd3f5.png
plt.subplots(ncols=2)
(<Figure size 640x480 with 2 Axes>, array([<Axes: >, <Axes: >], dtype=object))
_images/8d504a320169143d50d0a51bf494cb28571a80e24b8725b21458c147fa198994.png
fig, ax = plt.subplots()
_images/9787f8a2451f2e7e36145ae170b7a0df1c349c31116bdd2985cc12a5f053b850.png
dir(ax)
['ArtistList',
 '_AxesBase__clear',
 '_PROPERTIES_EXCLUDED_FROM_SET',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_add_text',
 '_adjustable',
 '_agg_filter',
 '_alias_map',
 '_alpha',
 '_anchor',
 '_animated',
 '_aspect',
 '_autotitlepos',
 '_axes',
 '_axes_locator',
 '_axis_map',
 '_axis_names',
 '_axisbelow',
 '_box_aspect',
 '_callbacks',
 '_check_no_units',
 '_children',
 '_clipon',
 '_clippath',
 '_cm_set',
 '_colorbars',
 '_convert_dx',
 '_current_image',
 '_default_contains',
 '_errorevery_to_mask',
 '_facecolor',
 '_fill_between_x_or_y',
 '_frameon',
 '_fully_clipped_to_axes',
 '_gci',
 '_gen_axes_patch',
 '_gen_axes_spines',
 '_get_aspect_ratio',
 '_get_lines',
 '_get_pan_points',
 '_get_patches_for_fill',
 '_get_view',
 '_gid',
 '_gridOn',
 '_in_layout',
 '_init_axis',
 '_internal_update',
 '_label',
 '_label_outer_xaxis',
 '_label_outer_yaxis',
 '_left_title',
 '_make_twin_axes',
 '_mouseover',
 '_mouseover_set',
 '_navigate',
 '_navigate_mode',
 '_originalPosition',
 '_parse_scatter_color_args',
 '_path_effects',
 '_pcolorargs',
 '_picker',
 '_position',
 '_prepare_view_from_bbox',
 '_process_unit_info',
 '_projection_init',
 '_quiver_units',
 '_rasterization_zorder',
 '_rasterized',
 '_remove_legend',
 '_remove_method',
 '_request_autoscale_view',
 '_right_title',
 '_sci',
 '_set_alpha_for_array',
 '_set_artist_props',
 '_set_gc_clip',
 '_set_lim_and_transforms',
 '_set_position',
 '_set_title_offset_trans',
 '_set_view',
 '_set_view_from_bbox',
 '_shared_axes',
 '_sharex',
 '_sharey',
 '_sketch',
 '_snap',
 '_stale',
 '_stale_viewlims',
 '_sticky_edges',
 '_subclass_uses_cla',
 '_subplotspec',
 '_tight',
 '_transform',
 '_transformSet',
 '_twinned_axes',
 '_unit_change_handler',
 '_unstale_viewLim',
 '_update_image_limits',
 '_update_line_limits',
 '_update_patch_limits',
 '_update_props',
 '_update_set_signature_and_docstring',
 '_update_title_position',
 '_update_transScale',
 '_url',
 '_use_sticky_edges',
 '_validate_converted_limits',
 '_viewLim',
 '_visible',
 '_xaxis_transform',
 '_xmargin',
 '_yaxis_transform',
 '_ymargin',
 'acorr',
 'add_artist',
 'add_callback',
 'add_child_axes',
 'add_collection',
 'add_container',
 'add_image',
 'add_line',
 'add_patch',
 'add_table',
 'angle_spectrum',
 'annotate',
 'apply_aspect',
 'arrow',
 'artists',
 'autoscale',
 'autoscale_view',
 'axes',
 'axhline',
 'axhspan',
 'axis',
 'axison',
 'axline',
 'axvline',
 'axvspan',
 'bar',
 'bar_label',
 'barbs',
 'barh',
 'bbox',
 'boxplot',
 'broken_barh',
 'bxp',
 'callbacks',
 'can_pan',
 'can_zoom',
 'child_axes',
 'cla',
 'clabel',
 'clear',
 'clipbox',
 'cohere',
 'collections',
 'containers',
 'contains',
 'contains_point',
 'contour',
 'contourf',
 'convert_xunits',
 'convert_yunits',
 'csd',
 'dataLim',
 'drag_pan',
 'draw',
 'draw_artist',
 'end_pan',
 'errorbar',
 'eventplot',
 'figure',
 'fill',
 'fill_between',
 'fill_betweenx',
 'findobj',
 'fmt_xdata',
 'fmt_ydata',
 'format_coord',
 'format_cursor_data',
 'format_xdata',
 'format_ydata',
 'get_adjustable',
 'get_agg_filter',
 'get_alpha',
 'get_anchor',
 'get_animated',
 'get_aspect',
 'get_autoscale_on',
 'get_autoscalex_on',
 'get_autoscaley_on',
 'get_axes_locator',
 'get_axisbelow',
 'get_box_aspect',
 'get_children',
 'get_clip_box',
 'get_clip_on',
 'get_clip_path',
 'get_cursor_data',
 'get_data_ratio',
 'get_default_bbox_extra_artists',
 'get_facecolor',
 'get_fc',
 'get_figure',
 'get_frame_on',
 'get_gid',
 'get_gridspec',
 'get_images',
 'get_in_layout',
 'get_label',
 'get_legend',
 'get_legend_handles_labels',
 'get_lines',
 'get_mouseover',
 'get_navigate',
 'get_navigate_mode',
 'get_path_effects',
 'get_picker',
 'get_position',
 'get_rasterization_zorder',
 'get_rasterized',
 'get_renderer_cache',
 'get_shared_x_axes',
 'get_shared_y_axes',
 'get_sketch_params',
 'get_snap',
 'get_subplotspec',
 'get_tightbbox',
 'get_title',
 'get_transform',
 'get_transformed_clip_path_and_affine',
 'get_url',
 'get_visible',
 'get_window_extent',
 'get_xaxis',
 'get_xaxis_text1_transform',
 'get_xaxis_text2_transform',
 'get_xaxis_transform',
 'get_xbound',
 'get_xgridlines',
 'get_xlabel',
 'get_xlim',
 'get_xmajorticklabels',
 'get_xminorticklabels',
 'get_xscale',
 'get_xticklabels',
 'get_xticklines',
 'get_xticks',
 'get_yaxis',
 'get_yaxis_text1_transform',
 'get_yaxis_text2_transform',
 'get_yaxis_transform',
 'get_ybound',
 'get_ygridlines',
 'get_ylabel',
 'get_ylim',
 'get_ymajorticklabels',
 'get_yminorticklabels',
 'get_yscale',
 'get_yticklabels',
 'get_yticklines',
 'get_yticks',
 'get_zorder',
 'grid',
 'has_data',
 'have_units',
 'hexbin',
 'hist',
 'hist2d',
 'hlines',
 'ignore_existing_data_limits',
 'images',
 'imshow',
 'in_axes',
 'indicate_inset',
 'indicate_inset_zoom',
 'inset_axes',
 'invert_xaxis',
 'invert_yaxis',
 'is_transform_set',
 'label_outer',
 'legend',
 'legend_',
 'lines',
 'locator_params',
 'loglog',
 'magnitude_spectrum',
 'margins',
 'matshow',
 'minorticks_off',
 'minorticks_on',
 'mouseover',
 'name',
 'patch',
 'patches',
 'pchanged',
 'pcolor',
 'pcolorfast',
 'pcolormesh',
 'phase_spectrum',
 'pick',
 'pickable',
 'pie',
 'plot',
 'plot_date',
 'properties',
 'psd',
 'quiver',
 'quiverkey',
 'redraw_in_frame',
 'relim',
 'remove',
 'remove_callback',
 'reset_position',
 'scatter',
 'secondary_xaxis',
 'secondary_yaxis',
 'semilogx',
 'semilogy',
 'set',
 'set_adjustable',
 'set_agg_filter',
 'set_alpha',
 'set_anchor',
 'set_animated',
 'set_aspect',
 'set_autoscale_on',
 'set_autoscalex_on',
 'set_autoscaley_on',
 'set_axes_locator',
 'set_axis_off',
 'set_axis_on',
 'set_axisbelow',
 'set_box_aspect',
 'set_clip_box',
 'set_clip_on',
 'set_clip_path',
 'set_facecolor',
 'set_fc',
 'set_figure',
 'set_frame_on',
 'set_gid',
 'set_in_layout',
 'set_label',
 'set_mouseover',
 'set_navigate',
 'set_navigate_mode',
 'set_path_effects',
 'set_picker',
 'set_position',
 'set_prop_cycle',
 'set_rasterization_zorder',
 'set_rasterized',
 'set_sketch_params',
 'set_snap',
 'set_subplotspec',
 'set_title',
 'set_transform',
 'set_url',
 'set_visible',
 'set_xbound',
 'set_xlabel',
 'set_xlim',
 'set_xmargin',
 'set_xscale',
 'set_xticklabels',
 'set_xticks',
 'set_ybound',
 'set_ylabel',
 'set_ylim',
 'set_ymargin',
 'set_yscale',
 'set_yticklabels',
 'set_yticks',
 'set_zorder',
 'sharex',
 'sharey',
 'specgram',
 'spines',
 'spy',
 'stackplot',
 'stairs',
 'stale',
 'stale_callback',
 'start_pan',
 'stem',
 'step',
 'sticky_edges',
 'streamplot',
 'table',
 'tables',
 'text',
 'texts',
 'tick_params',
 'ticklabel_format',
 'title',
 'titleOffsetTrans',
 'transAxes',
 'transData',
 'transLimits',
 'transScale',
 'tricontour',
 'tricontourf',
 'tripcolor',
 'triplot',
 'twinx',
 'twiny',
 'update',
 'update_datalim',
 'update_from',
 'use_sticky_edges',
 'viewLim',
 'violin',
 'violinplot',
 'vlines',
 'xaxis',
 'xaxis_date',
 'xaxis_inverted',
 'xcorr',
 'yaxis',
 'yaxis_date',
 'yaxis_inverted',
 'zorder']
x = np.arange(10)
y = np.sin(x)
ax.plot(x, y)
fig
_images/dccce60f0e8d797a7ece39a002f2cfc53db7bed78bb64ddf76f29e142aee12c4.png
x = np.arange(0, 10, .1)
y = np.sin(x)
ax.plot(x, y)
fig
_images/230e98bf9a9875fba1cda593400dfbdef5297d9e4f38ec22d77ffefb800a35cc.png
dir(fig)
['_PROPERTIES_EXCLUDED_FROM_SET',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_add_axes_internal',
 '_agg_filter',
 '_align_label_groups',
 '_alpha',
 '_animated',
 '_axobservers',
 '_axstack',
 '_button_pick_id',
 '_callbacks',
 '_canvas_callbacks',
 '_check_layout_engines_compat',
 '_clipon',
 '_clippath',
 '_cm_set',
 '_default_contains',
 '_dpi',
 '_fig_callbacks',
 '_fully_clipped_to_axes',
 '_gci',
 '_get_dpi',
 '_get_draw_artists',
 '_get_renderer',
 '_gid',
 '_in_layout',
 '_internal_update',
 '_label',
 '_layout_engine',
 '_localaxes',
 '_mouse_key_ids',
 '_mouseover',
 '_norm_per_subplot_kw',
 '_normalize_grid_string',
 '_original_dpi',
 '_path_effects',
 '_picker',
 '_process_projection_requirements',
 '_rasterized',
 '_remove_method',
 '_repr_html_',
 '_scroll_pick_id',
 '_set_alpha_for_array',
 '_set_artist_props',
 '_set_dpi',
 '_set_gc_clip',
 '_sketch',
 '_snap',
 '_stale',
 '_sticky_edges',
 '_suplabels',
 '_suptitle',
 '_supxlabel',
 '_supylabel',
 '_transform',
 '_transformSet',
 '_update_props',
 '_update_set_signature_and_docstring',
 '_url',
 '_visible',
 'add_artist',
 'add_axes',
 'add_axobserver',
 'add_callback',
 'add_gridspec',
 'add_subfigure',
 'add_subplot',
 'align_labels',
 'align_xlabels',
 'align_ylabels',
 'artists',
 'autofmt_xdate',
 'axes',
 'bbox',
 'bbox_inches',
 'callbacks',
 'canvas',
 'clear',
 'clf',
 'clipbox',
 'colorbar',
 'contains',
 'convert_xunits',
 'convert_yunits',
 'delaxes',
 'dpi',
 'dpi_scale_trans',
 'draw',
 'draw_artist',
 'draw_without_rendering',
 'execute_constrained_layout',
 'figbbox',
 'figimage',
 'figure',
 'findobj',
 'format_cursor_data',
 'frameon',
 'gca',
 'get_agg_filter',
 'get_alpha',
 'get_animated',
 'get_axes',
 'get_children',
 'get_clip_box',
 'get_clip_on',
 'get_clip_path',
 'get_constrained_layout',
 'get_constrained_layout_pads',
 'get_cursor_data',
 'get_default_bbox_extra_artists',
 'get_dpi',
 'get_edgecolor',
 'get_facecolor',
 'get_figheight',
 'get_figure',
 'get_figwidth',
 'get_frameon',
 'get_gid',
 'get_in_layout',
 'get_label',
 'get_layout_engine',
 'get_linewidth',
 'get_mouseover',
 'get_path_effects',
 'get_picker',
 'get_rasterized',
 'get_size_inches',
 'get_sketch_params',
 'get_snap',
 'get_tight_layout',
 'get_tightbbox',
 'get_transform',
 'get_transformed_clip_path_and_affine',
 'get_url',
 'get_visible',
 'get_window_extent',
 'get_zorder',
 'ginput',
 'have_units',
 'images',
 'is_transform_set',
 'legend',
 'legends',
 'lines',
 'mouseover',
 'number',
 'patch',
 'patches',
 'pchanged',
 'pick',
 'pickable',
 'properties',
 'remove',
 'remove_callback',
 'savefig',
 'sca',
 'set',
 'set_agg_filter',
 'set_alpha',
 'set_animated',
 'set_canvas',
 'set_clip_box',
 'set_clip_on',
 'set_clip_path',
 'set_constrained_layout',
 'set_constrained_layout_pads',
 'set_dpi',
 'set_edgecolor',
 'set_facecolor',
 'set_figheight',
 'set_figure',
 'set_figwidth',
 'set_frameon',
 'set_gid',
 'set_in_layout',
 'set_label',
 'set_layout_engine',
 'set_linewidth',
 'set_mouseover',
 'set_path_effects',
 'set_picker',
 'set_rasterized',
 'set_size_inches',
 'set_sketch_params',
 'set_snap',
 'set_tight_layout',
 'set_transform',
 'set_url',
 'set_visible',
 'set_zorder',
 'show',
 'stale',
 'stale_callback',
 'sticky_edges',
 'subfigs',
 'subfigures',
 'subplot_mosaic',
 'subplotpars',
 'subplots',
 'subplots_adjust',
 'suppressComposite',
 'suptitle',
 'supxlabel',
 'supylabel',
 'text',
 'texts',
 'tight_layout',
 'transFigure',
 'transSubfigure',
 'update',
 'update_from',
 'waitforbuttonpress',
 'zorder']
fig.savefig('sample.png')

Other attribures of axes

ax.set_title('Sample plot')
ax.set_xlabel('some x values')
ax.set_ylabel('some y values')
fig
_images/88985fda01abdb10adf7f6ca99b25748a465946719c2b9a262f7ac19158b84f5.png

Pandas#

import pandas as pd
s = pd.Series([0.1, 0.2, 0.3, 0.4])
s.index
RangeIndex(start=0, stop=4, step=1)
s.values
array([0.1, 0.2, 0.3, 0.4])
s[0:2]
0    0.1
1    0.2
dtype: float64
s = pd.Series({'a': 1, 'b': 2, 'c': 3, 'd': 4})
s
a    1
b    2
c    3
d    4
dtype: int64
s['a': 'c']
a    1
b    2
c    3
dtype: int64
s[['a', 'c']]
a    1
c    3
dtype: int64
s
a    1
b    2
c    3
d    4
dtype: int64
s + s
a    2
b    4
c    6
d    8
dtype: int64
s > 2
a    False
b    False
c     True
d     True
dtype: bool
s[[False, False, True, True]]
c    3
d    4
dtype: int64
s[s>2]
c    3
d    4
dtype: int64
dir(s)
['T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__long__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rfloordiv__',
 '__rmatmul__',
 '__rmod__',
 '__rmul__',
 '__ror__',
 '__round__',
 '__rpow__',
 '__rsub__',
 '__rtruediv__',
 '__rxor__',
 '__setattr__',
 '__setitem__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__truediv__',
 '__weakref__',
 '__xor__',
 '_accessors',
 '_accum_func',
 '_add_numeric_operations',
 '_agg_by_level',
 '_agg_examples_doc',
 '_agg_see_also_doc',
 '_align_frame',
 '_align_series',
 '_append',
 '_arith_method',
 '_as_manager',
 '_attrs',
 '_binop',
 '_can_hold_na',
 '_check_inplace_and_allows_duplicate_labels',
 '_check_inplace_setting',
 '_check_is_chained_assignment_possible',
 '_check_label_or_level_ambiguity',
 '_check_setitem_copy',
 '_clear_item_cache',
 '_clip_with_one_bound',
 '_clip_with_scalar',
 '_cmp_method',
 '_consolidate',
 '_consolidate_inplace',
 '_construct_axes_dict',
 '_construct_axes_from_arguments',
 '_construct_result',
 '_constructor',
 '_constructor_expanddim',
 '_convert',
 '_convert_dtypes',
 '_data',
 '_dir_additions',
 '_dir_deletions',
 '_drop_axis',
 '_drop_labels_or_levels',
 '_duplicated',
 '_find_valid_index',
 '_flags',
 '_get_axis',
 '_get_axis_name',
 '_get_axis_number',
 '_get_axis_resolvers',
 '_get_block_manager_axis',
 '_get_bool_data',
 '_get_cacher',
 '_get_cleaned_column_resolvers',
 '_get_index_resolvers',
 '_get_label_or_level_values',
 '_get_numeric_data',
 '_get_value',
 '_get_values',
 '_get_values_tuple',
 '_get_with',
 '_gotitem',
 '_hidden_attrs',
 '_indexed_same',
 '_info_axis',
 '_info_axis_name',
 '_info_axis_number',
 '_init_dict',
 '_init_mgr',
 '_inplace_method',
 '_internal_names',
 '_internal_names_set',
 '_is_cached',
 '_is_copy',
 '_is_label_or_level_reference',
 '_is_label_reference',
 '_is_level_reference',
 '_is_mixed_type',
 '_is_view',
 '_item_cache',
 '_ixs',
 '_logical_func',
 '_logical_method',
 '_map_values',
 '_maybe_update_cacher',
 '_memory_usage',
 '_metadata',
 '_mgr',
 '_min_count_stat_function',
 '_name',
 '_needs_reindex_multi',
 '_protect_consolidate',
 '_reduce',
 '_reindex_axes',
 '_reindex_indexer',
 '_reindex_multi',
 '_reindex_with_indexers',
 '_rename',
 '_replace_single',
 '_repr_data_resource_',
 '_repr_latex_',
 '_reset_cache',
 '_reset_cacher',
 '_set_as_cached',
 '_set_axis',
 '_set_axis_name',
 '_set_axis_nocheck',
 '_set_is_copy',
 '_set_labels',
 '_set_name',
 '_set_value',
 '_set_values',
 '_set_with',
 '_set_with_engine',
 '_slice',
 '_stat_axis',
 '_stat_axis_name',
 '_stat_axis_number',
 '_stat_function',
 '_stat_function_ddof',
 '_take',
 '_take_with_is_copy',
 '_typ',
 '_update_inplace',
 '_validate_dtype',
 '_values',
 '_where',
 'a',
 'abs',
 'add',
 'add_prefix',
 'add_suffix',
 'agg',
 'aggregate',
 'align',
 'all',
 'any',
 'append',
 'apply',
 'argmax',
 'argmin',
 'argsort',
 'array',
 'asfreq',
 'asof',
 'astype',
 'at',
 'at_time',
 'attrs',
 'autocorr',
 'axes',
 'b',
 'backfill',
 'between',
 'between_time',
 'bfill',
 'bool',
 'c',
 'clip',
 'combine',
 'combine_first',
 'compare',
 'convert_dtypes',
 'copy',
 'corr',
 'count',
 'cov',
 'cummax',
 'cummin',
 'cumprod',
 'cumsum',
 'd',
 'describe',
 'diff',
 'div',
 'divide',
 'divmod',
 'dot',
 'drop',
 'drop_duplicates',
 'droplevel',
 'dropna',
 'dtype',
 'dtypes',
 'duplicated',
 'empty',
 'eq',
 'equals',
 'ewm',
 'expanding',
 'explode',
 'factorize',
 'ffill',
 'fillna',
 'filter',
 'first',
 'first_valid_index',
 'flags',
 'floordiv',
 'ge',
 'get',
 'groupby',
 'gt',
 'hasnans',
 'head',
 'hist',
 'iat',
 'idxmax',
 'idxmin',
 'iloc',
 'index',
 'infer_objects',
 'info',
 'interpolate',
 'is_monotonic',
 'is_monotonic_decreasing',
 'is_monotonic_increasing',
 'is_unique',
 'isin',
 'isna',
 'isnull',
 'item',
 'items',
 'iteritems',
 'keys',
 'kurt',
 'kurtosis',
 'last',
 'last_valid_index',
 'le',
 'loc',
 'lt',
 'mad',
 'map',
 'mask',
 'max',
 'mean',
 'median',
 'memory_usage',
 'min',
 'mod',
 'mode',
 'mul',
 'multiply',
 'name',
 'nbytes',
 'ndim',
 'ne',
 'nlargest',
 'notna',
 'notnull',
 'nsmallest',
 'nunique',
 'pad',
 'pct_change',
 'pipe',
 'plot',
 'pop',
 'pow',
 'prod',
 'product',
 'quantile',
 'radd',
 'rank',
 'ravel',
 'rdiv',
 'rdivmod',
 'reindex',
 'reindex_like',
 'rename',
 'rename_axis',
 'reorder_levels',
 'repeat',
 'replace',
 'resample',
 'reset_index',
 'rfloordiv',
 'rmod',
 'rmul',
 'rolling',
 'round',
 'rpow',
 'rsub',
 'rtruediv',
 'sample',
 'searchsorted',
 'sem',
 'set_axis',
 'set_flags',
 'shape',
 'shift',
 'size',
 'skew',
 'slice_shift',
 'sort_index',
 'sort_values',
 'squeeze',
 'std',
 'sub',
 'subtract',
 'sum',
 'swapaxes',
 'swaplevel',
 'tail',
 'take',
 'to_clipboard',
 'to_csv',
 'to_dict',
 'to_excel',
 'to_frame',
 'to_hdf',
 'to_json',
 'to_latex',
 'to_list',
 'to_markdown',
 'to_numpy',
 'to_period',
 'to_pickle',
 'to_sql',
 'to_string',
 'to_timestamp',
 'to_xarray',
 'transform',
 'transpose',
 'truediv',
 'truncate',
 'tz_convert',
 'tz_localize',
 'unique',
 'unstack',
 'update',
 'value_counts',
 'values',
 'var',
 'view',
 'where',
 'xs']
s.mean()
2.5
s.describe()
count    4.000000
mean     2.500000
std      1.290994
min      1.000000
25%      1.750000
50%      2.500000
75%      3.250000
max      4.000000
dtype: float64

Dataframes#

data = {
    'country': ['Belgium', 'France', 'Germany', 'Netherlands', 'United Kingdom'],
    'population': [11.3, 64.3, 81.3, 16.9, 64.9],
    'area': [30510, 671308, 357050, 41526, 244820],
    'capital': ['Brussels', 'Paris', 'Berlin', 'Amsterdam', 'London']
}
df = pd.DataFrame(data)
df
country population area capital
0 Belgium 11.3 30510 Brussels
1 France 64.3 671308 Paris
2 Germany 81.3 357050 Berlin
3 Netherlands 16.9 41526 Amsterdam
4 United Kingdom 64.9 244820 London
df['country']
0           Belgium
1            France
2           Germany
3       Netherlands
4    United Kingdom
Name: country, dtype: object
df.columns
Index(['country', 'population', 'area', 'capital'], dtype='object')
df.set_index('country')  # does not change original data
population area capital
country
Belgium 11.3 30510 Brussels
France 64.3 671308 Paris
Germany 81.3 357050 Berlin
Netherlands 16.9 41526 Amsterdam
United Kingdom 64.9 244820 London
df
country population area capital
0 Belgium 11.3 30510 Brussels
1 France 64.3 671308 Paris
2 Germany 81.3 357050 Berlin
3 Netherlands 16.9 41526 Amsterdam
4 United Kingdom 64.9 244820 London

to save changes

  • reassign

  • use inplace option

# df = df.set_index('country') # reassign
df.set_index('country', inplace=True)
df
population area capital
country
Belgium 11.3 30510 Brussels
France 64.3 671308 Paris
Germany 81.3 357050 Berlin
Netherlands 16.9 41526 Amsterdam
United Kingdom 64.9 244820 London

Select an individual element - return the capital of France?

df['capital']['France']
'Paris'
df.loc['France', 'capital']
'Paris'
df['density'] = df['population'] / df['area'] * 10**6
df
population area capital density
country
Belgium 11.3 30510 Brussels 370.370370
France 64.3 671308 Paris 95.783158
Germany 81.3 357050 Berlin 227.699202
Netherlands 16.9 41526 Amsterdam 406.973944
United Kingdom 64.9 244820 London 265.092721
df['density'] > 300
country
Belgium            True
France            False
Germany           False
Netherlands        True
United Kingdom    False
Name: density, dtype: bool
df[df['density'] > 300]
population area capital density
country
Belgium 11.3 30510 Brussels 370.370370
Netherlands 16.9 41526 Amsterdam 406.973944
df.sort_values('area')
population area capital density
country
Belgium 11.3 30510 Brussels 370.370370
Netherlands 16.9 41526 Amsterdam 406.973944
United Kingdom 64.9 244820 London 265.092721
Germany 81.3 357050 Berlin 227.699202
France 64.3 671308 Paris 95.783158

What is the mean population for EU countries in this collection

df.index
Index(['Belgium', 'France', 'Germany', 'Netherlands', 'United Kingdom'], dtype='object', name='country')
eu_countries = df.index != 'United Kingdom'
eu_countries
array([ True,  True,  True,  True, False])
df.loc[eu_countries]
population area capital density
country
Belgium 11.3 30510 Brussels 370.370370
France 64.3 671308 Paris 95.783158
Germany 81.3 357050 Berlin 227.699202
Netherlands 16.9 41526 Amsterdam 406.973944
df.loc[eu_countries]['population']
country
Belgium        11.3
France         64.3
Germany        81.3
Netherlands    16.9
Name: population, dtype: float64
df.loc[eu_countries]['population'].mean()
43.449999999999996
df.loc[df.index != 'United Kingdom', 'population'].mean()
43.449999999999996
df.plot()
<Axes: xlabel='country'>
_images/57bdfd995333b5d9e408798480550191149546e55a2f1ba5fd837dc8f294f6e3.png
df['area'] /= 1000
df
population area capital density
country
Belgium 11.3 30.510 Brussels 370.370370
France 64.3 671.308 Paris 95.783158
Germany 81.3 357.050 Berlin 227.699202
Netherlands 16.9 41.526 Amsterdam 406.973944
United Kingdom 64.9 244.820 London 265.092721
df.plot()
<Axes: xlabel='country'>
_images/9ddad66b6d474c8e413e84908890e77dd5bff4ca4236dfe72bc7ebb482a3ea23.png
df.plot(kind='bar')
<Axes: xlabel='country'>
_images/863d73cd6986f66992849014728aa3c06d5d84b4e872799f1810c107f771b515.png

export/import#

df.to_csv('countries.csv')
!cat countries.csv
country,population,area,capital,density
Belgium,11.3,30.51,Brussels,370.3703703703704
France,64.3,671.308,Paris,95.78315765639617
Germany,81.3,357.05,Berlin,227.69920179246603
Netherlands,16.9,41.526,Amsterdam,406.97394403506235
United Kingdom,64.9,244.82,London,265.09272118290994
# !conda install --yes openpyxl
df.to_excel('countries.xlsx')
!ls countries*
countries.csv  countries.xlsx
pd.read_csv('countries.csv')
country population area capital density
0 Belgium 11.3 30.510 Brussels 370.370370
1 France 64.3 671.308 Paris 95.783158
2 Germany 81.3 357.050 Berlin 227.699202
3 Netherlands 16.9 41.526 Amsterdam 406.973944
4 United Kingdom 64.9 244.820 London 265.092721
pd.read_excel('countries.xlsx')
country population area capital density
0 Belgium 11.3 30.510 Brussels 370.370370
1 France 64.3 671.308 Paris 95.783158
2 Germany 81.3 357.050 Berlin 227.699202
3 Netherlands 16.9 41.526 Amsterdam 406.973944
4 United Kingdom 64.9 244.820 London 265.092721