Documentation ¶
Index ¶
- Variables
- func CleanString(dirty string) string
- func DefaultPublicationFields(pub *Publication) (result []string)
- func DefaultPublicationHeaders() (result []string)
- func Download(destPath string, ApiKey string) (err error)
- func GetReleaseIds(baseURL string, ApiKey string) (responseData []string, err error)
- func MakeRequest(URL string, ApiKey string) (response []byte, err error)
- type Datasets
- type DownloadLinks
- type ETL
- func (e *ETL) AddPublicationFieldHandler(fn func(pub *Publication) []string) *ETL
- func (e *ETL) AddPublicationHeaderHandler(fn func() []string) *ETL
- func (e *ETL) AppendFile(data [][]string, filePath string) (err error)
- func (e *ETL) CheckDefaultHandlers()
- func (e *ETL) ExportAppendCsv(i int, publications []*Publication, prefix, suffix string) (err error)
- func (e *ETL) ExportCsv(i int, gzip, addHeaders bool, onlyHeaders bool, publications []*Publication, ...) (err error)
- func (e *ETL) TransformDirectory() (err error)
- func (e *ETL) WriteFile(gzip bool, data [][]string, filePath string) (err error)
- type Publication
Constants ¶
This section is empty.
Variables ¶
var Author2PublicationEdgesHeader = []string{
"authorId",
"publicationId",
"type",
}
var AuthorNodesHeader = []string{
"authorId",
"name",
}
var FieldOfStudyNodesHeader = []string{
"fieldOfStudyId",
}
var InCitationEdgesHeader = []string{
"publicationIdStart",
"publicationIdEnd",
"type",
}
var Neo4jAuthor2PublicationEdgesHeader = []string{
":START_ID(Author-ID)",
":END_ID(Publication-ID)",
":TYPE",
}
var Neo4jAuthorNodesHeader = []string{
"authorId:ID(Author-ID)",
"name",
}
var Neo4jFieldOfStudyNodesHeader = []string{
"fieldOfStudyId:ID(Field-Of-Study-ID)",
}
var Neo4jInCitationEdgesHeader = []string{
":START_ID(Publication-ID)",
":END_ID(Publication-ID)",
":TYPE",
}
var Neo4jOutCitationEdgesHeader = []string{
":START_ID(Publication-ID)",
":END_ID(Publication-ID)",
":TYPE",
}
var Neo4jPublication2FieldsOfStudyEdgesHeader = []string{
":START_ID(Publication-ID)",
":END_ID(Field-Of-Study-ID)",
":TYPE",
}
var Neo4jPublicationNodesHeader = []string{
"publicationId:ID(Publication-ID)",
"title",
"paperAbstract",
"s2url",
"sources",
"pdfUrls",
"year:int",
"venue",
"journalName",
"journalVolume",
"journalPages",
"doi",
"doiUrl",
"pmId",
"magId",
}
var OutCitationEdgesHeader = []string{
"publicationIdStart",
"publicationIdEnd",
"type",
}
var Publication2FieldsOfStudyEdgesHeader = []string{
"publicationId",
"fieldOfStudyId",
"type",
}
var PublicationNodesHeader = []string{
"publicationId",
"title",
"paperAbstract",
"s2url",
"sources",
"pdfUrls",
"year:int",
"venue",
"journalName",
"journalVolume",
"journalPages",
"doi",
"doiUrl",
"pmId",
"magId",
}
Functions ¶
func CleanString ¶
CleanString repairs artifacts that are in the dataset e.g. German umlauts
func DefaultPublicationFields ¶
func DefaultPublicationFields(pub *Publication) (result []string)
func DefaultPublicationHeaders ¶
func DefaultPublicationHeaders() (result []string)
func GetReleaseIds ¶
Types ¶
type DownloadLinks ¶
type ETL ¶
type ETL struct { ImportDirectory string ExportDirectory string Compress bool Combined bool AddHeaders bool IncludePublications bool IncludeAuthors bool IncludeFieldOfStudies bool IncludeAuthorPublicationEdges bool IncludePublicationFieldOfStudyEdges bool IncludeInCitationEdges bool IncludeOutCitationEdges bool PublicationFieldHandler func(pub *Publication) []string PublicationHeaderHandler func() []string }
func (*ETL) AddPublicationFieldHandler ¶
func (e *ETL) AddPublicationFieldHandler(fn func(pub *Publication) []string) *ETL
func (*ETL) AddPublicationHeaderHandler ¶
func (*ETL) AppendFile ¶
AppendFile appends the content to all file
func (*ETL) CheckDefaultHandlers ¶
func (e *ETL) CheckDefaultHandlers()
CheckDefaultHandlers checks if there are handlers for the publications in place otherwise, use the default handlers
func (*ETL) ExportAppendCsv ¶
func (e *ETL) ExportAppendCsv(i int, publications []*Publication, prefix, suffix string) (err error)
ExportAppendCsv transforms the data and stores it in a (compressed) csv file
func (*ETL) ExportCsv ¶
func (e *ETL) ExportCsv(i int, gzip, addHeaders bool, onlyHeaders bool, publications []*Publication, prefix, suffix string) (err error)
ExportCsv transforms the data and stores it in a (compressed) csv file
func (*ETL) TransformDirectory ¶
type Publication ¶
type Publication struct { ID string `json:"id"` // S2 generated research paper ID Title string `json:"title"` // Research paper title PaperAbstract string `json:"paperAbstract"` // Extracted abstract of the paper // Entities []string `json:"entities"` // Extracted entities (deprecated on 2019-09-17) FieldsOfStudy []string `json:"fieldsOfStudy"` // Zero or more fields of study this paper addresses S2URL string `json:"s2Url"` // URL to S2 research paper details page PdfUrls []string `json:"pdfUrls"` // URLs related to this PDF scraped from the web Authors []struct { Name string `json:"name"` // Name of the author IDs []string `json:"ids"` // S2ID of the author } `json:"authors"` // List of authors with an S2 generated author ID and name InCitations []string `json:"inCitations"` // List of S2 paper IDs which cited this paper OutCitations []string `json:"outCitations"` // List of S2 paper IDs which this paper cited Year int `json:"year"` // Year this paper was published as integer Venue string `json:"venue"` // Extracted publication venue for this paper JournalName string `json:"journalName"` // Name of the journal that published this paper JournalVolume string `json:"journalVolume"` // The volume of the journal where this paper was published JournalPages string `json:"journalPages"` // The pages of the journal where this paper was published Sources []string `json:"sources"` // Identifies papers sourced from DBLP or Medline Doi string `json:"doi"` // Digital Object Identifier registered at doi.org DoiURL string `json:"doiUrl"` // DOI link for registered objects PmID string `json:"pmid"` // Unique identifier used by PubMed MagID string `json:"magId"` // Unique identifier used by Microsoft Academic Graph }
func ParseFile ¶
func ParseFile(fileName string) (results []*Publication, err error)
ParseFile takes a file name read the data from within the file and returns an array of parse Publications. It also checks if the file is in a compressed format like .gz
func ParseLine ¶
func ParseLine(line []byte) (data Publication, err error)
ParseLine takes a line in byte from and returns a parse publication
func ReadFromDirectory ¶
func ReadFromDirectory(directoryPath string) (results []*Publication, err error)
ReadFromDirectory parses the directory of separated files provided by semantic scholar