×

Welcome to TagMyCode

Please login or create account to add a snippet.
0
0
 
1
Language: Python
Posted by: Roman Ignatov
Added: Nov 30, 2016 10:57 AM
Views: 26
Tags: pandas
  1. # see http://www.swegler.com/becky/blog/2014/08/06/useful-pandas-snippets/
  2.  
  3. #List unique values in a DataFrame column
  4. pd.unique(df.column_name.ravel())
  5.  
  6. #Convert Series datatype to numeric, getting rid of any non-numeric values
  7. df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)
  8.  
  9. #Grab DataFrame rows where column has certain values
  10. valuelist = ['value1', 'value2', 'value3']
  11. df = df[df.column.isin(value_list)]
  12.  
  13. #Grab DataFrame rows where column doesn't have certain values
  14. valuelist = ['value1', 'value2', 'value3']
  15. df = df[~df.column.isin(value_list)]
  16.  
  17. #Delete column from DataFrame
  18. del df['column']
  19.  
  20. #Select from DataFrame using criteria from multiple columns
  21. newdf = df[(df['column_one']>2004) & (df['column_two']==9)]
  22.  
  23. #Rename several DataFrame columns
  24. df = df.rename(columns = {
  25.     'col1 old name':'col1 new name',
  26.     'col2 old name':'col2 new name',
  27.     'col3 old name':'col3 new name',
  28. })
  29.  
  30. #lower-case all DataFrame column names
  31. df.columns = map(str.lower, df.columns)
  32.  
  33. #even more fancy DataFrame column re-naming
  34. #lower-case all DataFrame column names (for example)
  35. df.rename(columns=lambda x: x.split('.')[-1], inplace=True)
  36.  
  37. #Loop through rows in a DataFrame
  38. #(if you must)
  39. for index, row in df:
  40.     print index, row['some column']  
  41.  
  42. #Lower-case everything in a DataFrame column
  43. df.column_name = df.column_name.str.lower()
  44.  
  45. #Sort dataframe by multiple columns
  46. df = df.sort(['col1','col2','col3'],ascending=[1,1,0])
  47.  
  48. #get top n for each group of columns in a sorted dataframe
  49. #(make sure dataframe is sorted first)
  50. top5 = df.groupby(['groupingcol1', 'groupingcol2']).head(5)
  51.  
  52. #Grab DataFrame rows where specific column is null/notnull
  53. newdf = df[df['column'].isnull()]
  54. len(newdf)
  55.  
  56. #select from DataFrame using multiple keys of a hierarchical index
  57. df.xs(('index level 1 value','index level 2 value'), level=('level 1','level 2'))
  58.  
  59. #Change all NaNs to None (useful before
  60. #loading to a db)
  61. df = df.where((pd.notnull(df)), None)
  62.  
  63. #Slice values in a DataFrame column (aka Series)
  64. df.column.str[0:2]
  65.  
  66. #Get quick count of rows in a DataFrame
  67. len(df.index)
  68.  
  69. #Pivot data (with flexibility about what what
  70. #becomes a column and what stays a row).
  71. #Syntax works on Pandas >= .14
  72. pd.pivot_table(
  73.   df,values='cell_value',
  74.   index=['col1', 'col2', 'col3'], #these stay as columns
  75.   columns=['col4']) #data values in this column become their own column
  76.  
  77. #change data type of DataFrame column
  78. df.column_name = df.column_name.astype(np.int64)
  79.  
  80. # Get rid of non-numeric values throughout a DataFrame:
  81. for col in refunds.columns.values:
  82.   refunds[col] = refunds[col].replace('[^0-9]+.-', '', regex=True)
  83.  
  84. #Set DataFrame column values based on other column values
  85. df['column_to_change'][(df['column1'] == some_value) & (df['column2'] == some_other_value)] = new_value
  86.  
  87. #Clean up missing values in multiple DataFrame columns
  88. df = df.fillna({
  89.     'col1': 'missing',
  90.     'col2': '99.999',
  91.     'col3': '999',
  92.     'col4': 'missing',
  93.     'col5': 'missing',
  94.     'col6': '99'
  95. })
  96.  
  97. #Doing calculations with DataFrame columns that have missing values
  98. #In example below, swap in 0 for df['col1'] cells that contain null
  99. df['new_col'] = np.where(pd.isnull(df['col1']),0,df['col1']) + df['col2']
  100.  
  101. # Split delimited values in a DataFrame column into two new columns
  102. df['new_col1'], df['new_col2'] = zip(*df['original_col'].apply(lambda x: x.split(': ', 1)))
  103.  
  104. #Convert Django queryset to DataFrame
  105. #convert queryset to dataframe
  106. qs = DjangoModelName.objects.all()
  107. q = qs.values()
  108. df = pd.DataFrame.from_records(q)