Documentation ¶
Index ¶
- func WriteCsv(df DataFrame, pathToFile string) (os.FileInfo, error)
- func WriteExcel(df DataFrame, pathToFile string) (os.FileInfo, error)
- func WriteJson(df DataFrame, pathToFile string) (os.FileInfo, error)
- type DataFrame
- func NewDataFrame(data [][]interface{}, columns []string, indexCols []string) (DataFrame, error)
- func ReadCsv(pathToFile string, indexCols []string) (DataFrame, error)
- func ReadExcel(pathToFile, sheetName string, axis int) (DataFrame, error)
- func ReadJsonByColumns(pathToFile string, indexCols []string) (DataFrame, error)
- func ReadJsonStream(pathToFile string, indexCols []string) (DataFrame, error)
- func (df *DataFrame) ColAdd(colname string, value float64) (DataFrame, error)
- func (df *DataFrame) ColDiv(colname string, value float64) (DataFrame, error)
- func (df *DataFrame) ColEq(colname string, value float64) (DataFrame, error)
- func (df *DataFrame) ColGt(colname string, value float64) (DataFrame, error)
- func (df *DataFrame) ColLt(colname string, value float64) (DataFrame, error)
- func (df *DataFrame) ColMod(colname string, value float64) (DataFrame, error)
- func (df *DataFrame) ColMul(colname string, value float64) (DataFrame, error)
- func (df *DataFrame) ColSub(colname string, value float64) (DataFrame, error)
- func (df *DataFrame) DropNaN(axis int) (DataFrame, error)
- func (df *DataFrame) GroupBy(by ...string) (GroupBy, error)
- func (df *DataFrame) Head(howMany int)
- func (df *DataFrame) Loc(cols []string, rows ...[]interface{}) (DataFrame, error)
- func (df *DataFrame) LocCols(cols ...string) (DataFrame, error)
- func (df *DataFrame) LocColsItems(cols ...string) ([][]interface{}, error)
- func (df *DataFrame) LocRows(rows ...[]interface{}) (DataFrame, error)
- func (df *DataFrame) LocRowsItems(rows ...[]interface{}) ([][]interface{}, error)
- func (df *DataFrame) MarshalJSON() ([]byte, error)
- func (df *DataFrame) Melt(colName, valueName string) (DataFrame, error)
- func (df *DataFrame) MergeDfsHorizontally(target DataFrame) (DataFrame, error)
- func (df *DataFrame) MergeDfsVertically(target DataFrame) (DataFrame, error)
- func (df *DataFrame) NewCol(colname string, data []interface{}) (DataFrame, error)
- func (df *DataFrame) NewDerivedCol(colname, srcCol string) (DataFrame, error)
- func (df *DataFrame) Pivot(column, value string) (DataFrame, error)
- func (df *DataFrame) PivotTable(index, column, value string, aggFunc StatsFunc) (DataFrame, error)
- func (df *DataFrame) Print()
- func (df *DataFrame) PrintRange(start, end int)
- func (df *DataFrame) RenameCol(colnames map[string]string) error
- func (df *DataFrame) SortByColumns()
- func (df *DataFrame) SortByIndex(ascending bool) error
- func (df *DataFrame) SortByValues(by string, ascending bool) error
- func (df *DataFrame) SortIndexColFirst()
- func (df *DataFrame) Tail(howMany int)
- type GroupBy
- type Index
- type IndexData
- type Series
- func (s *Series) At(ind ...interface{}) (interface{}, error)
- func (s *Series) Count() StatsResult
- func (s *Series) Describe() ([]float64, error)
- func (s *Series) Head(howMany int)
- func (s *Series) IAt(ind int) (interface{}, error)
- func (s *Series) ILoc(min, max int) ([]interface{}, error)
- func (s *Series) IndexHasDuplicateValues() (bool, error)
- func (s Series) Len() int
- func (s Series) Less(i, j int) bool
- func (s *Series) Loc(idx ...[]interface{}) (Series, error)
- func (s *Series) LocItems(idx ...[]interface{}) ([]interface{}, error)
- func (s *Series) Max() StatsResult
- func (s *Series) Mean() StatsResult
- func (s *Series) Median() StatsResult
- func (s *Series) Min() StatsResult
- func (s *Series) Print()
- func (s *Series) PrintRange(start, end int)
- func (s *Series) Q1() StatsResult
- func (s *Series) Q2() StatsResult
- func (s *Series) Q3() StatsResult
- func (s *Series) RenameCol(newName string)
- func (s *Series) RenameIndex(newNames map[string]string) error
- func (s *Series) SortByGivenIndex(index IndexData) error
- func (s *Series) SortByIndex(ascending bool) error
- func (s *Series) SortByValues(ascending bool) error
- func (s *Series) Std() StatsResult
- func (s Series) Swap(i, j int)
- func (s *Series) Tail(howMany int)
- func (s *Series) ValueCounts() (Series, error)
- type StatsFunc
- type StatsResult
- func Count(dataset []interface{}) StatsResult
- func Max(dataset []interface{}) StatsResult
- func Mean(dataset []interface{}) StatsResult
- func Median(dataset []interface{}) StatsResult
- func Min(dataset []interface{}) StatsResult
- func Q1(dataset []interface{}) StatsResult
- func Q2(dataset []interface{}) StatsResult
- func Q3(dataset []interface{}) StatsResult
- func Std(dataset []interface{}) StatsResult
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func WriteCsv ¶
WriteCsv writes a DataFrame object to CSV file. It is recommended to generate pathToFile using `filepath.Join`.
func WriteExcel ¶ added in v0.1.0
WriteExcel writes a DataFrame object into an Excel file.
Types ¶
type DataFrame ¶
type DataFrame struct {
// contains filtered or unexported fields
}
DataFrame type represents a 2D tabular dataset. A DataFrame object is comprised of multiple Series objects.
func NewDataFrame ¶
NewDataFrame created a new DataFrame object from given parameters. Generally, NewDataFrameFromFile will be used more often.
func ReadCsv ¶
ReadCsv reads a CSV file and returns a new DataFrame object. It is recommended to generate pathToFile using `filepath.Join`.
func ReadExcel ¶ added in v0.1.0
ReadExcel reads an excel file and converts it to a DataFrame object. The axis depends on the layout of the data. Row-based data where each group represents a row will have an axis=0. Column-based data where each group represents a column will have an axis=1.
func ReadJsonByColumns ¶
ReadJson reads a JSON file and returns a new DataFrame object. It is recommended to generate pathToFile using `filepath.Join`. The JSON file should be in this format: {"col1":[val1, val2, ...], "col2":[val1, val2, ...], ...} You can either set a column to be the index, or set it as nil. If nil, a new RangeIndex will be created. Your index column should not have any missing values. Order of columns is not guaranteed, but the index column will always come first.
func ReadJsonStream ¶
ReadJsonStream reads a JSON stream and returns a new DataFrame object. The JSON file should be in this format: {"col1":val1, "col2":val2, ...}{"col1":val1, "col2":val2, ...}
func (*DataFrame) ColDiv ¶
ColDiv() divides each element in the specified column by the given value.
func (*DataFrame) ColEq ¶
ColEq() checks if each element in the specified column is equal to the given value.
func (*DataFrame) ColGt ¶
ColGt() checks if each element in the specified column is greater than the given value.
func (*DataFrame) ColLt ¶
ColLt() checks if each element in the specified column is less than the given value.
func (*DataFrame) ColMod ¶
ColMod() applies modulus calculations on each element in the specified column, returning the remainder.
func (*DataFrame) ColMul ¶
ColMul() multiplies each element in the specified column by the given value.
func (*DataFrame) ColSub ¶
ColSub() subtracts the given value from each element in the specified column.
func (*DataFrame) DropNaN ¶
DropNaN drops rows or columns with NaN values. Specify axis to choose whether to remove rows with NaN or columns with NaN. axis=0 is row, axis=1 is column.
func (*DataFrame) GroupBy ¶
GroupBy groups selected columns in a DataFrame object and returns a GroupBy object.
func (*DataFrame) LocCols ¶
LocRows returns a set of columns as a new DataFrame object, given a list of labels.
func (*DataFrame) LocColsItems ¶
LocColsItems will return a slice of columns. Use this over LocCols if you want to extract the items directly instead of getting a DataFrame object.
func (*DataFrame) LocRows ¶
LocRows returns a set of rows as a new DataFrame object, given a list of labels.
func (*DataFrame) LocRowsItems ¶
LocRowsItems will return a slice of rows. Use this over LocRows if you want to extract the items directly instead of getting a DataFrame object.
func (*DataFrame) MarshalJSON ¶
MarshalJSON is used to implement the json.Marshaler interface{}.
func (*DataFrame) Melt ¶
Melt returns the table from wide to long format. Use Melt to revert to pre-Pivot format.
func (*DataFrame) MergeDfsHorizontally ¶ added in v0.1.0
MergeDfsHorizontally merges two DataFrame objects side by side. The target DataFrame will always be appended to the right of the source DataFrame. Index will reset and become a RangeIndex.
func (*DataFrame) MergeDfsVertically ¶ added in v0.1.0
MergeDfsVertically stacks two DataFrame objects vertically.
func (*DataFrame) NewCol ¶
NewCol creates a new column with the given data and column name. To create a blank column, pass in a slice with empty string values like so: []interface{}{"", "", "", ...}
func (*DataFrame) NewDerivedCol ¶
NewDerivedCol creates a new column derived from an existing column. It copies over the data from a column named srcCol into a new column. You can then apply column operations such as ColAdd to the new column.
func (*DataFrame) Pivot ¶
Pivot returns an organized dataframe that has values corresponding to the index and the given column.
func (*DataFrame) PivotTable ¶
PivotTable rearranges the data by a given index and column. Each value will be aggregated via an aggregation function. Pick three columns from the DataFrame, each to serve as the index, column, and value. PivotTable ignores NaN values.
func (*DataFrame) PrintRange ¶
PrintRange prints x at a given range. Index starts at 0. For example, to print 3 elements starting from the 2nd element, use PrintRange(2, 5).
func (*DataFrame) SortByColumns ¶
func (df *DataFrame) SortByColumns()
SortByColumns sorts the columns of the DataFrame object.
func (*DataFrame) SortByIndex ¶
SortByIndex sorts the items by index.
func (*DataFrame) SortByValues ¶
SortByValues sorts the items by values in a selected series.
func (*DataFrame) SortIndexColFirst ¶
func (df *DataFrame) SortIndexColFirst()
SortIndexColFirst puts the index column at the front.
type GroupBy ¶
type GroupBy struct {
// contains filtered or unexported fields
}
GroupBy type is a intermediary struct that is created after running DataFrame.GroupBy(). It holds the necessary data for applying operations such as GroupBy.Agg().
type Index ¶
type Index struct {
// contains filtered or unexported fields
}
Index stores the index values of a series and dataframe. The 0th element must be the ID of the index. For example, if your data includes a column of names that you have set to be the index, the index may look like this: Index{0, "Alice"}, Index{1, "Bob"}, Index{2, "Charlie"}. Index{} with more than one value (not including the ID) is considered a multi-index.
type IndexData ¶
type IndexData struct {
// contains filtered or unexported fields
}
IndexData type is used to hold index information of a Series or a DataFrame.
func CreateRangeIndex ¶
CreateRangeIndex takes the length of an Index and creates a RangeIndex. RangeIndex is an index that spans from 0 to the length of the index.
type Series ¶
type Series struct {
// contains filtered or unexported fields
}
Series type represents a column of data.
func NewSeries ¶
NewSeries created a new Series object from given parameters. Generally, NewSeriesFromFile will be used more often. The index parameter can be set to nil when calling NewSeries on its own. This field is for passing in the DataFrame's index data in NewDataFrame.
func (*Series) At ¶
At returns an element at a given index. For multiindex, you need to pass in the whole index tuple.
func (*Series) Count ¶
func (s *Series) Count() StatsResult
Count counts the number of non-NA elements in a column.
func (*Series) Describe ¶
Describe runs through the most commonly used statistics functions and prints the output.
func (*Series) IndexHasDuplicateValues ¶
IndexHasDuplicateValues checks if the Series have duplicate index values.
func (*Series) LocItems ¶
LocItems returns a slice of data at given rows. Use this over Loc if you want to extract the items directly instead of getting a Series object.
func (*Series) Max ¶
func (s *Series) Max() StatsResult
Max returns the largest element is a column.
func (*Series) Mean ¶
func (s *Series) Mean() StatsResult
Mean returns the mean of the elements in a column.
func (*Series) Median ¶
func (s *Series) Median() StatsResult
Median returns the median of the elements in a column.
func (*Series) Min ¶
func (s *Series) Min() StatsResult
Min returns the smallest element in a column.
func (*Series) PrintRange ¶
PrintRange prints x at a given range. Index starts at 0. For example, to print 3 elements starting from the 2nd element, use PrintRange(2, 5).
func (*Series) Q1 ¶
func (s *Series) Q1() StatsResult
Q1 returns the lower quartile (25%) of the elements in a column. This does not include the median during calculation.
func (*Series) Q2 ¶
func (s *Series) Q2() StatsResult
Q2 returns the middle quartile (50%) of the elements in a column. This accomplishes the same thing as s.Median().
func (*Series) Q3 ¶
func (s *Series) Q3() StatsResult
Q3 returns the upper quartile (75%) of the elements in a column. This does not include the median during calculation.
func (*Series) RenameIndex ¶
RenameIndex renames the index of the series. Input should be a map, where key is the index name to change and value is a new name.
func (*Series) SortByGivenIndex ¶
SortByGivenIndex sorts the Series by a given index.
func (*Series) SortByIndex ¶
SortByIndex sorts the elements in a series by the index.
func (*Series) SortByValues ¶
SortByValues sorts the Series by its values.
func (*Series) Std ¶
func (s *Series) Std() StatsResult
Std returns the sample standard deviation of the elements in a column.
func (*Series) ValueCounts ¶
ValueCounts returns a Series containing the number of unique values in a given Series.
type StatsFunc ¶
type StatsFunc func(dataset []interface{}) StatsResult
StatsFunc represents any function that accepts dataset as input and returns StatsResult as output.
type StatsResult ¶
StatsResult holds the results of calculation from a statistics function such as Mean or Median.
func Count ¶
func Count(dataset []interface{}) StatsResult
Count counts the number of non-NA elements in a column.
func Mean ¶
func Mean(dataset []interface{}) StatsResult
Mean returns the mean of the elements in a column.
func Median ¶
func Median(dataset []interface{}) StatsResult
Median returns the median of the elements in a column.
func Min ¶
func Min(dataset []interface{}) StatsResult
Min returns the smallest element in a column.
func Q1 ¶
func Q1(dataset []interface{}) StatsResult
Q1 returns the lower quartile (25%) of the elements in a column. This does not include the median during calculation.
func Q2 ¶
func Q2(dataset []interface{}) StatsResult
Q2 returns the middle quartile (50%) of the elements in a column. This accomplishes the same thing as s.Median().
func Q3 ¶
func Q3(dataset []interface{}) StatsResult
Q3 returns the upper quartile (75%) of the elements in a column. This does not include the median during calculation.
func Std ¶
func Std(dataset []interface{}) StatsResult
Std returns the sample standard deviation of the elements in a column.