semanticscholar

package

v0.0.0-...-162f227 Latest Latest Go to latest Published: Nov 5, 2024 License: MIT Imports: 15 Imported by: 0

Documentation ¶

Index ¶

Variables
func CleanString(dirty string) string
func DefaultPublicationFields(pub *Publication) (result []string)
func DefaultPublicationHeaders() (result []string)
func Download(destPath string, ApiKey string) (err error)
func GetReleaseIds(baseURL string, ApiKey string) (responseData []string, err error)
func MakeRequest(URL string, ApiKey string) (response []byte, err error)
type Datasets
type DownloadLinks
type ETL
type Publication

Constants ¶

This section is empty.

Variables ¶

View Source

var Author2PublicationEdgesHeader = []string{
	"authorId",
	"publicationId",
	"type",
}

View Source

var AuthorNodesHeader = []string{
	"authorId",
	"name",
}

View Source

var FieldOfStudyNodesHeader = []string{
	"fieldOfStudyId",
}

View Source

var InCitationEdgesHeader = []string{
	"publicationIdStart",
	"publicationIdEnd",
	"type",
}

View Source

var Neo4jAuthor2PublicationEdgesHeader = []string{
	":START_ID(Author-ID)",
	":END_ID(Publication-ID)",
	":TYPE",
}

View Source

var Neo4jAuthorNodesHeader = []string{
	"authorId:ID(Author-ID)",
	"name",
}

View Source

var Neo4jFieldOfStudyNodesHeader = []string{
	"fieldOfStudyId:ID(Field-Of-Study-ID)",
}

View Source

var Neo4jInCitationEdgesHeader = []string{
	":START_ID(Publication-ID)",
	":END_ID(Publication-ID)",
	":TYPE",
}

View Source

var Neo4jOutCitationEdgesHeader = []string{
	":START_ID(Publication-ID)",
	":END_ID(Publication-ID)",
	":TYPE",
}

View Source

var Neo4jPublication2FieldsOfStudyEdgesHeader = []string{
	":START_ID(Publication-ID)",
	":END_ID(Field-Of-Study-ID)",
	":TYPE",
}

View Source

var Neo4jPublicationNodesHeader = []string{
	"publicationId:ID(Publication-ID)",
	"title",
	"paperAbstract",
	"s2url",
	"sources",
	"pdfUrls",
	"year:int",
	"venue",
	"journalName",
	"journalVolume",
	"journalPages",
	"doi",
	"doiUrl",
	"pmId",
	"magId",
}

View Source

var OutCitationEdgesHeader = []string{
	"publicationIdStart",
	"publicationIdEnd",
	"type",
}

View Source

var Publication2FieldsOfStudyEdgesHeader = []string{
	"publicationId",
	"fieldOfStudyId",
	"type",
}

View Source

var PublicationNodesHeader = []string{
	"publicationId",
	"title",
	"paperAbstract",
	"s2url",
	"sources",
	"pdfUrls",
	"year:int",
	"venue",
	"journalName",
	"journalVolume",
	"journalPages",
	"doi",
	"doiUrl",
	"pmId",
	"magId",
}

Functions ¶

func CleanString ¶

func CleanString(dirty string) string

CleanString repairs artifacts that are in the dataset e.g. German umlauts

func DefaultPublicationFields ¶

func DefaultPublicationFields(pub *Publication) (result []string)

func DefaultPublicationHeaders ¶

func DefaultPublicationHeaders() (result []string)

func Download ¶

func Download(destPath string, ApiKey string) (err error)

func GetReleaseIds ¶

func GetReleaseIds(baseURL string, ApiKey string) (responseData []string, err error)

func MakeRequest ¶

func MakeRequest(URL string, ApiKey string) (response []byte, err error)

Types ¶

type Datasets ¶

type Datasets struct {
	ReleaseID string `json:"release_id"`
	Readme    string `json:"README"`
	Datasets  []struct {
		Name        string `json:"name"`
		Description string `json:"description"`
		Readme      string `json:"README"`
	} `json:"datasets"`
}

type DownloadLinks ¶

type DownloadLinks struct {
	Name        string   `json:"name"`
	Description string   `json:"description"`
	Readme      string   `json:"README"`
	Files       []string `json:"files"`
}

type ETL ¶

type ETL struct {
	ImportDirectory                     string
	ExportDirectory                     string
	Compress                            bool
	Combined                            bool
	AddHeaders                          bool
	IncludePublications                 bool
	IncludeAuthors                      bool
	IncludeFieldOfStudies               bool
	IncludeAuthorPublicationEdges       bool
	IncludePublicationFieldOfStudyEdges bool
	IncludeInCitationEdges              bool
	IncludeOutCitationEdges             bool
	PublicationFieldHandler             func(pub *Publication) []string
	PublicationHeaderHandler            func() []string
}

func (*ETL) AddPublicationFieldHandler ¶

func (e *ETL) AddPublicationFieldHandler(fn func(pub *Publication) []string) *ETL

func (*ETL) AddPublicationHeaderHandler ¶

func (e *ETL) AddPublicationHeaderHandler(fn func() []string) *ETL

func (*ETL) AppendFile ¶

func (e *ETL) AppendFile(data [][]string, filePath string) (err error)

AppendFile appends the content to all file

func (*ETL) CheckDefaultHandlers ¶

func (e *ETL) CheckDefaultHandlers()

CheckDefaultHandlers checks if there are handlers for the publications in place otherwise, use the default handlers

func (*ETL) ExportAppendCsv ¶

func (e *ETL) ExportAppendCsv(i int, publications []*Publication, prefix, suffix string) (err error)

ExportAppendCsv transforms the data and stores it in a (compressed) csv file

func (*ETL) ExportCsv ¶

func (e *ETL) ExportCsv(i int, gzip, addHeaders bool, onlyHeaders bool, publications []*Publication, prefix, suffix string) (err error)

ExportCsv transforms the data and stores it in a (compressed) csv file

func (*ETL) TransformDirectory ¶

func (e *ETL) TransformDirectory() (err error)

func (*ETL) WriteFile ¶

func (e *ETL) WriteFile(gzip bool, data [][]string, filePath string) (err error)

type Publication ¶

type Publication struct {
	ID            string `json:"id"`            // S2 generated research paper ID
	Title         string `json:"title"`         // Research paper title
	PaperAbstract string `json:"paperAbstract"` // Extracted abstract of the paper
	// Entities      []string `json:"entities"` // Extracted entities (deprecated on 2019-09-17)
	FieldsOfStudy []string `json:"fieldsOfStudy"` // Zero or more fields of study this paper addresses
	S2URL         string   `json:"s2Url"`         // URL to S2 research paper details page
	PdfUrls       []string `json:"pdfUrls"`       // URLs related to this PDF scraped from the web
	Authors       []struct {
		Name string   `json:"name"` // Name of the author
		IDs  []string `json:"ids"`  // S2ID of the author
	} `json:"authors"` // List of authors with an S2 generated author ID and name
	InCitations   []string `json:"inCitations"`   // List of S2 paper IDs which cited this paper
	OutCitations  []string `json:"outCitations"`  // List of S2 paper IDs which this paper cited
	Year          int      `json:"year"`          // Year this paper was published as integer
	Venue         string   `json:"venue"`         // Extracted publication venue for this paper
	JournalName   string   `json:"journalName"`   // Name of the journal that published this paper
	JournalVolume string   `json:"journalVolume"` // The volume of the journal where this paper was published
	JournalPages  string   `json:"journalPages"`  // The pages of the journal where this paper was published
	Sources       []string `json:"sources"`       // Identifies papers sourced from DBLP or Medline
	Doi           string   `json:"doi"`           // Digital Object Identifier registered at doi.org
	DoiURL        string   `json:"doiUrl"`        // DOI link for registered objects
	PmID          string   `json:"pmid"`          // Unique identifier used by PubMed
	MagID         string   `json:"magId"`         // Unique identifier used by Microsoft Academic Graph
}

func ParseFile ¶

func ParseFile(fileName string) (results []*Publication, err error)

ParseFile takes a file name read the data from within the file and returns an array of parse Publications. It also checks if the file is in a compressed format like .gz

func ParseLine ¶

func ParseLine(line []byte) (data Publication, err error)

ParseLine takes a line in byte from and returns a parse publication

func ReadFromDirectory ¶

func ReadFromDirectory(directoryPath string) (results []*Publication, err error)

ReadFromDirectory parses the directory of separated files provided by semantic scholar

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL