dataframe

package
v0.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 14, 2024 License: MulanPSL-2.0 Imports: 12 Imported by: 0

Documentation

Index

Examples

Constants

View Source
const (
	XLSX int = iota
	CSV
)

Variables

This section is empty.

Functions

func ReadXLSX

func ReadXLSX(filePath string, sheets ...Sheets) (map[string]*DataFrame, error)

ReadXLSX 从XLSX中读取表格

Example
dfs, err := ReadXLSX("test.xlsx",
	Sheets{
		SheetName: "Sheet3",
		ColsType:  []series.Type{series.String, series.Int},
	},
)
if err != nil {
	panic(err)
}
for s, frame := range dfs {
	fmt.Println("Sheet Name is", s)
	fmt.Println(frame)
}
Output:

Example (A)
dfs, err := ReadXLSX("test.xlsx",
	Sheets{
		SheetName: "Sheet3",
		SRow:      5,
		SCol:      2,
		ColsType:  []series.Type{series.String, series.Int},
	},
)
if err != nil {
	panic(err)
}
for s, frame := range dfs {
	fmt.Println("Sheet Name is", s)
	fmt.Println(frame)
}
Output:

Types

type DataFrame

type DataFrame struct {
	// contains filtered or unexported fields
}

func LoadMap

func LoadMap(values map[string]any) (*DataFrame, error)

LoadMap 通过列名集合创建表

values map[string]T, T = []int、[]string、[]float64、[]bool
Example
df, _ := LoadMap(map[string]any{
	"name":  data.CreateSlice("Join", 100),
	"phone": data.CreateSlice("15963578965", 100),
})
fmt.Println(df)
Output:

	+------------------------------+
|   DataFrame Size:2 x 100    |
+-------+--------+-------------+
| Index | name   | phone       |
+-------+--------+-------------+
| 1     | Join   | 15963578965 |
| 2     | Join   | 15963578965 |
| 3     | Join   | 15963578965 |
| 4     | Join   | 15963578965 |
| 5     | Join   | 15963578965 |
| 6     | Join   | 15963578965 |
| 7     | Join   | 15963578965 |
| 8     | Join   | 15963578965 |
| 9     | Join   | 15963578965 |
| 10    | Join   | 15963578965 |
| 11    | Join   | 15963578965 |
| 12    | Join   | 15963578965 |
| 13    | Join   | 15963578965 |
| 14    | Join   | 15963578965 |
| 15    | Join   | 15963578965 |
| .     | .      | .           |
| .     | .      | .           |
| .     | .      | .           |
| 95    | Join   | 15963578965 |
| 96    | Join   | 15963578965 |
| 97    | Join   | 15963578965 |
| 98    | Join   | 15963578965 |
| 99    | Join   | 15963578965 |
| 100   | Join   | 15963578965 |
+-------+--------+-------------+
| Types | string | string      |
+-------+--------+-------------+

func LoadRecord

func LoadRecord(rows [][]string, colsName []string, colsType []series.Type) (*DataFrame, error)

LoadRecord 用二维字符串切片创建 DataFrame 数据对象

rows: 待输入数据
colsName:每列名称,当为 nil 时,rows[0]作为每列名称
colsType:每列数据类型
Example
df, _ := LoadRecord(
	data.CreateSlice([]string{"Join", "15963578965"}, 100),
	[]string{"name", "phone"},
	[]series.Type{series.String, series.Int},
)
fmt.Println(df)
Output:

	+------------------------------+
|   DataFrame Size:2 x 100    |
+-------+--------+-------------+
| Index | name   | phone       |
+-------+--------+-------------+
| 1     | Join   | 15963578965 |
| 2     | Join   | 15963578965 |
| 3     | Join   | 15963578965 |
| 4     | Join   | 15963578965 |
| 5     | Join   | 15963578965 |
| 6     | Join   | 15963578965 |
| 7     | Join   | 15963578965 |
| 8     | Join   | 15963578965 |
| 9     | Join   | 15963578965 |
| 10    | Join   | 15963578965 |
| 11    | Join   | 15963578965 |
| 12    | Join   | 15963578965 |
| 13    | Join   | 15963578965 |
| 14    | Join   | 15963578965 |
| 15    | Join   | 15963578965 |
| .     | .      | .           |
| .     | .      | .           |
| .     | .      | .           |
| 95    | Join   | 15963578965 |
| 96    | Join   | 15963578965 |
| 97    | Join   | 15963578965 |
| 98    | Join   | 15963578965 |
| 99    | Join   | 15963578965 |
| 100   | Join   | 15963578965 |
+-------+--------+-------------+
| Types | string | int         |
+-------+--------+-------------+

func New

func New(columns []any, colsName []string) (*DataFrame, error)

New 创建 DataFrame 数据对象

columns: 待输入数据,可以为 []any 。series.Series / []int / []float64 / []string / []bool
colsName: 列名,当 columns 为 series.Series 可为nil
Example
df, _ := New(
	[]any{data.CreateSlice("Join", 100), data.CreateSlice(15963578965, 100)},
	[]string{"name", "phone"},
)
fmt.Println(df)
Output:

	+------------------------------+
|   DataFrame Size:2 x 100    |
+-------+--------+-------------+
| Index | name   | phone       |
+-------+--------+-------------+
| 1     | Join   | 15963578965 |
| 2     | Join   | 15963578965 |
| 3     | Join   | 15963578965 |
| 4     | Join   | 15963578965 |
| 5     | Join   | 15963578965 |
| 6     | Join   | 15963578965 |
| 7     | Join   | 15963578965 |
| 8     | Join   | 15963578965 |
| 9     | Join   | 15963578965 |
| 10    | Join   | 15963578965 |
| 11    | Join   | 15963578965 |
| 12    | Join   | 15963578965 |
| 13    | Join   | 15963578965 |
| 14    | Join   | 15963578965 |
| 15    | Join   | 15963578965 |
| .     | .      | .           |
| .     | .      | .           |
| .     | .      | .           |
| 95    | Join   | 15963578965 |
| 96    | Join   | 15963578965 |
| 97    | Join   | 15963578965 |
| 98    | Join   | 15963578965 |
| 99    | Join   | 15963578965 |
| 100   | Join   | 15963578965 |
+-------+--------+-------------+
| Types | string | int         |
+-------+--------+-------------+

func ReadCSV

func ReadCSV(filePath string, sheet Sheets) (*DataFrame, error)

ReadCSV 从CSV中读取表格

Example
readCSV, err := ReadCSV("test.csv", Sheets{
	ColsType: []series.Type{series.String, series.Int},
})
if err != nil {
	panic(err)
}
fmt.Println(readCSV)
Output:

func (*DataFrame) AddCol

func (df *DataFrame) AddCol(name string, values any, defaultValue any) error

AddCol 添加列

name:列名。如果已存在更新,否则添加
values:可选 series.Series []E {int | float64 | bool | string}
defaultValue:当 values 长度不足时,自动添加
Example
df, _ := New(
	[]any{data.CreateSlice("Join", 5), data.CreateSlice(15963578965, 5)},
	[]string{"name", "phone"},
)
_ = df.AddCol("addr", data.CreateSlice("xxxxx", 5), nil)
fmt.Println(df)
_ = df.AddCol("addr", data.CreateSlice("yyyy", 1), "yyy")
fmt.Println(df)
Output:

+---------------------------------------+
|         DataFrame Size:3 x 5         |
+-------+--------+-------------+--------+
| Index | name   | phone       | addr   |
+-------+--------+-------------+--------+
| 1     | Join   | 15963578965 | xxxxx  |
| 2     | Join   | 15963578965 | xxxxx  |
| 3     | Join   | 15963578965 | xxxxx  |
| 4     | Join   | 15963578965 | xxxxx  |
| 5     | Join   | 15963578965 | xxxxx  |
+-------+--------+-------------+--------+
| Types | string | int         | string |
+-------+--------+-------------+--------+

+---------------------------------------+
|         DataFrame Size:3 x 5         |
+-------+--------+-------------+--------+
| Index | name   | phone       | addr   |
+-------+--------+-------------+--------+
| 1     | Join   | 15963578965 | yyyy   |
| 2     | Join   | 15963578965 | yyy    |
| 3     | Join   | 15963578965 | yyy    |
| 4     | Join   | 15963578965 | yyy    |
| 5     | Join   | 15963578965 | yyy    |
+-------+--------+-------------+--------+
| Types | string | int         | string |
+-------+--------+-------------+--------+

func (*DataFrame) AddRows

func (df *DataFrame) AddRows(values [][]any) error

AddRows 向列表末尾添加行

Example
df, _ := New(
	[]any{data.CreateSlice("Join", 5), data.CreateSlice(15963578965, 5)},
	[]string{"name", "phone"},
)
_ = df.AddRows([][]any{{"name1", 12345678}, {"name1", 12345678}, {"name1", 12345678}})
fmt.Println(df)
Output:

+------------------------------+
|    DataFrame Size:2 x 8     |
+-------+--------+-------------+
| Index | name   | phone       |
+-------+--------+-------------+
| 1     | Join   | 15963578965 |
| 2     | Join   | 15963578965 |
| 3     | Join   | 15963578965 |
| 4     | Join   | 15963578965 |
| 5     | Join   | 15963578965 |
| 6     | name1  | 12345678    |
| 7     | name1  | 12345678    |
| 8     | name1  | 12345678    |
+-------+--------+-------------+
| Types | string | int         |
+-------+--------+-------------+

func (*DataFrame) Arrange

func (df *DataFrame) Arrange(order ...Order) error

Arrange 排序

Example
df, _ := New(
	[]any{
		[]string{"伏旭歆", "管原炳", "仰芝凤", "万茵瑾", "左芊筱", "俞淑允", "宗茹淳", "卓虹", "司丽瑾", "岑泳继"},
		[]int{13935531105, 15665203778, 14583084372, 14779318181, 17606363473, 18950385204, 18659058185, 16628908658, 17590257481, 17254554855},
		[]int{35, 36, 42, 13, 20, 20, 14, 20, 30, 36},
	},
	[]string{"name", "phone", "age"},
)
_ = df.Arrange(SortByForward("name"), Order{ColumnName: "age", Reverse: true})
fmt.Println(df)
Output:

+------------------------------------+
|       DataFrame Size:3 x 10       |
+-------+--------+-------------+-----+
| Index | name   | phone       | age |
+-------+--------+-------------+-----+
| 1     | 仰芝凤 | 14583084372 | 42  |
| 2     | 岑泳继 | 17254554855 | 36  |
| 3     | 管原炳 | 15665203778 | 36  |
| 4     | 伏旭歆 | 13935531105 | 35  |
| 5     | 司丽瑾 | 17590257481 | 30  |
| 6     | 卓虹   | 16628908658 | 20  |
| 7     | 左芊筱 | 17606363473 | 20  |
| 8     | 俞淑允 | 18950385204 | 20  |
| 9     | 宗茹淳 | 18659058185 | 14  |
| 10    | 万茵瑾 | 14779318181 | 13  |
+-------+--------+-------------+-----+
| Types | string | int         | int |
+-------+--------+-------------+-----+

func (*DataFrame) Cell

func (df *DataFrame) Cell(r int, name string) series.Element

Cell 返回指定单元格元素

func (*DataFrame) Columns

func (df *DataFrame) Columns(name string) (series.Series, error)

Columns 返回列

func (*DataFrame) Concat

func (df *DataFrame) Concat(x DataFrame, isColumn bool) error

Concat 合并两个表

isColumn:是否合并在右侧 ,如果两个表列名相同,则更新原表列
Example
df, _ := New(
	[]any{data.CreateSlice("Join", 3), data.CreateSlice(15963578965, 3)},
	[]string{"name", "phone"},
)
df1, _ := New(
	[]any{data.CreateSlice("Mary", 3), data.CreateSlice(19645698705, 3)},
	[]string{"name", "phone"},
)
_ = df.Concat(*df1, false)
fmt.Println(df)
Output:

+------------------------------+
|    DataFrame Size:2 x 6     |
+-------+--------+-------------+
| Index | name   | phone       |
+-------+--------+-------------+
| 1     | Join   | 15963578965 |
| 2     | Join   | 15963578965 |
| 3     | Join   | 15963578965 |
| 4     | Mary   | 19645698705 |
| 5     | Mary   | 19645698705 |
| 6     | Mary   | 19645698705 |
+-------+--------+-------------+
| Types | string | int         |
+-------+--------+-------------+

func (*DataFrame) Copy

func (df *DataFrame) Copy() *DataFrame

Copy 复制

func (*DataFrame) DropCols

func (df *DataFrame) DropCols(names ...string)

DropCols 批量删除

func (*DataFrame) Filter

func (df *DataFrame) Filter(filters ...F) (*DataFrame, error)

Filter 过滤

func (*DataFrame) FormatCols

func (df *DataFrame) FormatCols(f func(index int, elem series.Element) series.Element, cols ...string) error
Example
df, _ := New(
	[]any{data.CreateSlice("Join", 5), data.CreateSlice(15963578965, 5)},
	[]string{"name", "phone"},
)
err := df.FormatCols(func(index int, elem series.Element) series.Element {
	elem.Set(elem.Records() + strconv.Itoa(index))
	return elem
}, "name")
if err != nil {
	panic(err)
}
fmt.Println(df)
Output:

+------------------------------+
|    DataFrame Size:2 x 5     |
+-------+--------+-------------+
| Index | name   | phone       |
+-------+--------+-------------+
| 1     | Join0  | 15963578965 |
| 2     | Join1  | 15963578965 |
| 3     | Join2  | 15963578965 |
| 4     | Join3  | 15963578965 |
| 5     | Join4  | 15963578965 |
+-------+--------+-------------+
| Types | string | int         |
+-------+--------+-------------+

func (*DataFrame) Groups

func (df *DataFrame) Groups(names ...string) (map[string]*DataFrame, error)
Example
df, _ := New(
	[]any{
		[]string{"喻靖元", "尤淇方", "方文栋", "郝晨轩", "养海露", "弘展鹏", "滕安平", "谷灵雁", "陶海露", "乔瀚天"},
		[]string{"男", "男", "男", "男", "女", "男", "男", "女", "女", "男"},
		[]int{51, 29, 44, 21, 26, 29, 68, 21, 29, 52},
	},
	[]string{"name", "sex", "age"},
)
group, _ := df.Groups("sex")
for s, frame := range group {
	fmt.Println("group_name is " + s)
	fmt.Println(frame)
}
Output:

group_name is 男
+-------------------------------+
|     DataFrame Size:3 x 7     |
+-------+--------+--------+-----+
| Index | name   | sex    | age |
+-------+--------+--------+-----+
| 1     | 喻靖元 | 男     | 51  |
| 2     | 尤淇方 | 男     | 29  |
| 3     | 方文栋 | 男     | 44  |
| 4     | 郝晨轩 | 男     | 21  |
| 5     | 弘展鹏 | 男     | 29  |
| 6     | 滕安平 | 男     | 68  |
| 7     | 乔瀚天 | 男     | 52  |
+-------+--------+--------+-----+
| Types | string | string | int |
+-------+--------+--------+-----+

group_name is 女
+-------------------------------+
|     DataFrame Size:3 x 3     |
+-------+--------+--------+-----+
| Index | name   | sex    | age |
+-------+--------+--------+-----+
| 1     | 养海露 | 女     | 26  |
| 2     | 谷灵雁 | 女     | 21  |
| 3     | 陶海露 | 女     | 29  |
+-------+--------+--------+-----+
| Types | string | string | int |
+-------+--------+--------+-----+
Example (Two)
df, _ := New(
	[]any{
		[]string{"喻靖元", "尤淇方", "方文栋", "郝晨轩", "养海露", "弘展鹏", "滕安平", "谷灵雁", "陶海露", "乔瀚天"},
		[]string{"男", "男", "男", "男", "女", "男", "男", "女", "女", "男"},
		[]int{51, 29, 44, 21, 26, 29, 68, 21, 29, 52},
	},
	[]string{"name", "sex", "age"},
)
group, _ := df.Groups("sex", "age")
for s, frame := range group {
	fmt.Println("group_name is " + s)
	fmt.Println(frame)
}
Output:

group_name is 男51
+-------------------------------+
|     DataFrame Size:3 x 1     |
+-------+--------+--------+-----+
| Index | name   | sex    | age |
+-------+--------+--------+-----+
| 1     | 喻靖元 | 男     | 51  |
+-------+--------+--------+-----+
| Types | string | string | int |
+-------+--------+--------+-----+

	......

group_name is 男29
+-------------------------------+
|     DataFrame Size:3 x 2     |
+-------+--------+--------+-----+
| Index | name   | sex    | age |
+-------+--------+--------+-----+
| 1     | 尤淇方 | 男     | 29  |
| 2     | 弘展鹏 | 男     | 29  |
+-------+--------+--------+-----+
| Types | string | string | int |
+-------+--------+--------+-----+

group_name is 男21
+-------------------------------+
|     DataFrame Size:3 x 1     |
+-------+--------+--------+-----+
| Index | name   | sex    | age |
+-------+--------+--------+-----+
| 1     | 郝晨轩 | 男     | 21  |
+-------+--------+--------+-----+
| Types | string | string | int |
+-------+--------+--------+-----+

func (*DataFrame) NCols

func (df *DataFrame) NCols() int

func (*DataFrame) NRows

func (df *DataFrame) NRows() int

func (*DataFrame) Names

func (df *DataFrame) Names() []string

Names 返回列名

func (*DataFrame) Print

func (df *DataFrame) Print(isComplete bool) (str string)

func (*DataFrame) Records

func (df *DataFrame) Records(isRow bool, hasColName bool) [][]string

Records 返回字符串切片

isRow = false 返回列切片 isRow = true  返回行切片
hasColName 是否返回列名,第一个元素,或切片
Example
df, _ := New(
	[]any{data.CreateSlice("Join", 5), data.CreateSlice(15963578965, 5)},
	[]string{"name", "phone"},
)
fmt.Println(df.Records(false, false))
fmt.Println(df.Records(false, true))
fmt.Println(df.Records(true, true))
Output:

[[Join ... Join] [15963578965 ... 15963578965]]
[[name Join ... Join] [phone 15963578965 ... 15963578965]]
[[name phone] [Join 15963578965] ... [Join 15963578965]]

func (*DataFrame) Rename

func (df *DataFrame) Rename(cols map[string]string)

Rename 批量命名

func (*DataFrame) Rows

func (df *DataFrame) Rows(r int) map[string]series.Element

Rows 返回行

func (*DataFrame) SelectCols

func (df *DataFrame) SelectCols(names ...string) *DataFrame

func (*DataFrame) Set

func (df *DataFrame) Set(index int, values any) error

Set 设置 index 行的值

values:可选[]any、map[string]any ,[]any要更改行的所有元素
index < DataFrame.rows 更新行,index >= DataFrame.rows 添加行
Example
df, _ := New(
	[]any{data.CreateSlice("Join", 5), data.CreateSlice(15963578965, 5)},
	[]string{"name", "phone"},
)
_ = df.Set(0, []any{"Andy", 1111111111})
fmt.Println(df)
_ = df.Set(2, map[string]any{"phone": 222222222})
fmt.Println(df)
_ = df.Set(df.rows, []any{"Andy", 1111111111})
fmt.Println(df)
Output:

+------------------------------+
|    DataFrame Size:2 x 5     |
+-------+--------+-------------+
| Index | name   | phone       |
+-------+--------+-------------+
| 1     | Andy   | 1111111111  |
| 2     | Join   | 15963578965 |
| 3     | Join   | 15963578965 |
| 4     | Join   | 15963578965 |
| 5     | Join   | 15963578965 |
+-------+--------+-------------+
| Types | string | int         |
+-------+--------+-------------+

+------------------------------+
|    DataFrame Size:2 x 5     |
+-------+--------+-------------+
| Index | name   | phone       |
+-------+--------+-------------+
| 1     | Andy   | 1111111111  |
| 2     | Join   | 15963578965 |
| 3     | Join   | 222222222   |
| 4     | Join   | 15963578965 |
| 5     | Join   | 15963578965 |
+-------+--------+-------------+
| Types | string | int         |
+-------+--------+-------------+

+------------------------------+
|    DataFrame Size:2 x 6     |
+-------+--------+-------------+
| Index | name   | phone       |
+-------+--------+-------------+
| 1     | Andy   | 1111111111  |
| 2     | Join   | 15963578965 |
| 3     | Join   | 222222222   |
| 4     | Join   | 15963578965 |
| 5     | Join   | 15963578965 |
| 6     | Andy   | 1111111111  |
+-------+--------+-------------+
| Types | string | int         |
+-------+--------+-------------+

func (*DataFrame) Size

func (df *DataFrame) Size() (cols, rows int)

Size 更新并返回二维数组大小

func (*DataFrame) String

func (df *DataFrame) String() string

自定义输出

func (*DataFrame) SubSet

func (df *DataFrame) SubSet(indexes ...int) (*DataFrame, error)

func (*DataFrame) Types

func (df *DataFrame) Types() []string

Types 返回列类型

func (*DataFrame) WriteToCSV

func (df *DataFrame) WriteToCSV(p string) error
Example
df, _ := New(
	[]any{data.CreateSlice("Join", 100), data.CreateSlice(15963578965, 100)},
	[]string{"name", "phone"},
)
err := df.WriteToCSV("test.csv")
if err != nil {
	panic(err)
}
Output:

func (*DataFrame) WriteToXLSX

func (df *DataFrame) WriteToXLSX(p, sheetName string) error
Example
df, _ := New(
	[]any{data.CreateSlice("Join", 100), data.CreateSlice(15963578965, 100)},
	[]string{"name", "phone"},
)
err := df.WriteToXLSX("test.xlsx", "Sheet3")
if err != nil {
	panic(err)
}
Output:

type F

type F struct {
	Column   string
	Operator series.RelationalOperator

	OR bool
	// contains filtered or unexported fields
}

F 过滤条件

OR与前一个过滤的关系

type Order

type Order struct {
	// 列名
	ColumnName string
	// 倒叙
	Reverse bool
}

Order 排序结构

func SortByForward

func SortByForward(name string) Order

SortByForward 正序

func SortByReverse

func SortByReverse(name string) Order

SortByReverse 倒叙

type Sheets

type Sheets struct {
	SCol      int           // 开始列, 默认 1
	SRow      int           // 开始行, 默认 1
	ECol      int           // 结束列, 默认最后一列
	ERow      int           // 结束行, 默认最后一行
	Header    []string      // 表头,默认第一行
	SheetName string        // 工作部名 XLSX 特有
	ColsType  []series.Type // 列类型
}

Sheets 数据对象

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL