openalex

package
v0.0.0-...-d63711e Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 5, 2024 License: MIT Imports: 28 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

AllManifestUrls is a list of all manifest URLs

View Source
var ErrStatusNotOK = errors.New("status code is not OK")

ErrStatusNotOK is returned when the status code is not OK

View Source
var ErrUnsupportedFileType = errors.New("unsupported file type")

ErrUnsupportedFileType is returned when the file type is not supported

Functions

func GetFiles

func GetFiles(directoryPath string) (filePaths []string, err error)

GetFiles returns a list of files in a directory

func OrderByMergedIDsLast

func OrderByMergedIDsLast(filePaths []string) []string

OrderByMergedIDsLast sorts the file paths so that the merged ids file is last

func ParseFile

func ParseFile(filePath string, fn ParsedEntityLineHandler, sh *StateHandler) (count int, err error)

ParseFile takes a file name and reads the data from within the file and parses every line it into structs

func ParseMergedIDsFile

func ParseMergedIDsFile(filePath string, fn MergedIdRecordHandler) (err error)

ParseMergedIDsFile parses a CSV file (either plain or Gzipped) into a slice of CsvData

func PrintEntityHandler

func PrintEntityHandler(fileEntityType FileEntityType, entity any) error

PrintEntityHandler is a function that prints a parsed line of a file

func PrintMergedIdRecordHandler

func PrintMergedIdRecordHandler(fileEntityType FileEntityType, mergedID MergedID) error

func ProcessDirectory

func ProcessDirectory(directoryPath string, fnEntityHandler ParsedEntityLineHandler, fnMergedIdHandler MergedIdRecordHandler, sh *StateHandler) (err error)

ProcessDirectory parses the directory of separated files and processes them

func ProcessFiles

func ProcessFiles(filePaths []string, fnEntityHandler ParsedEntityLineHandler, fnMergedIdHandler MergedIdRecordHandler, sh *StateHandler) (err error)

ProcessFiles parses the files and processes them

func Sync

func Sync(destPath string, sh *StateHandler) (err error)

Sync downloads the latest snapshot from openalex "AWS CLI" installation required https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html Note that the Snapshot has around 422GB and 1.6TB after uncompression

Types

type Affiliation

type Affiliation struct {
	Years       []int `json:"years"`
	Institution AuthorInstitution
}

type Author

type Author struct {
	ID                      string               `json:"id"`
	CitedByCount            int                  `json:"cited_by_count"`
	CountsByYear            []AuthorCountsByYear `json:"counts_by_year"`
	CreatedDate             string               `json:"created_date"`
	DisplayName             string               `json:"display_name"`
	DisplayNameAlternatives []string             `json:"display_name_alternatives"`
	Ids                     AuthorIDs            `json:"ids"`
	LastKnownInstitution    AuthorInstitution    `json:"last_known_institution"`
	MostCitedWork           string               `json:"most_cited_work"`
	Orcid                   string               `json:"orcid"`
	SummaryStats            AuthorSummaryStats   `json:"summary_stats"`
	UpdatedDate             string               `json:"updated_date"`
	WorksAPIURL             string               `json:"works_api_url"`
	WorksCount              int                  `json:"works_count"`
	XConcepts               []AuthorXConcept     `json:"x_concepts"`
	Affiliations            []Affiliation        `json:"affiliations"` // this is available via the api
}

Author is a struct that represents the data of an author of OpenAlex

func (*Author) GetID

func (a *Author) GetID() string

GetID returns the ID of the author

func (*Author) GetType

func (a *Author) GetType() string

GetType returns the entity type

type AuthorCountsByYear

type AuthorCountsByYear struct {
	Year         int `json:"year"`
	WorksCount   int `json:"works_count"`
	CitedByCount int `json:"cited_by_count"`
}

type AuthorIDs

type AuthorIDs struct {
	Openalex  string `json:"openalex"`
	Orcid     string `json:"orcid"`
	Scopus    string `json:"scopus"`
	Wikipedia string `json:"wikipedia"`
}

type AuthorInstitution

type AuthorInstitution struct {
	CountryCode string `json:"country_code"`
	DisplayName string `json:"display_name"`
	ID          string `json:"id"`
	Ror         string `json:"ror"`
	Type        string `json:"type"`
}

type AuthorSummaryStats

type AuthorSummaryStats struct {
	CitedByCount2yr  int     `json:"2yr_cited_by_count" graphql:"two_year_cited_by_count"`
	HIndex2yr        int     `json:"2yr_h_index" graphql:"two_year_h_index"`
	I10Index2yr      int     `json:"2yr_i10_index" graphql:"two_year_i10_index"`
	MeanCitedness2yr float64 `json:"2yr_mean_citedness" graphql:"two_year_mean_citedness"`
	WorksCount2yr    int     `json:"2yr_works_count" graphql:"two_year_works_count"`
	CitedByCount     int     `json:"cited_by_count"`
	HIndex           int     `json:"h_index"`
	I10Index         int     `json:"i10_index"`
	OaPercent        float64 `json:"oa_percent"`
	WorksCount       int     `json:"works_count"`
}

type AuthorXConcept

type AuthorXConcept struct {
	DisplayName string  `json:"display_name"`
	ID          string  `json:"id"`
	Level       float64 `json:"level"`
	Score       float64 `json:"score"`
	Wikidata    string  `json:"wikidata"`
}

type Book

type Book struct {
	ID          string `json:"id"`
	Title       string `json:"title"`
	PublishDate string `json:"publish_date"`
	RedirectTo  string `json:"redirect_to"`
}

type Concept

type Concept struct {
	ID        string `json:"id"`
	Ancestors []struct {
		DisplayName string `json:"display_name"`
		ID          string `json:"id"`
		Level       int    `json:"level"`
		Wikidata    string `json:"wikidata"`
	} `json:"ancestors"`
	CitedByCount int `json:"cited_by_count"`
	CountsByYear []struct {
		CitedByCount int `json:"cited_by_count"`
		OaWorksCount int `json:"oa_works_count"`
		WorksCount   int `json:"works_count"`
		Year         int `json:"year"`
	} `json:"counts_by_year"`
	CreatedDate string `json:"created_date"`
	Description string `json:"description"`
	DisplayName string `json:"display_name"`
	Ids         struct {
		Mag       jsoniter.Number `json:"mag"`
		Openalex  string          `json:"openalex"`
		UmlsCui   []string        `json:"umls_cui,omitempty"`
		Wikidata  string          `json:"wikidata"`
		Wikipedia string          `json:"wikipedia"`
	} `json:"ids"`
	ImageThumbnailURL *string `json:"image_thumbnail_url"`
	ImageURL          *string `json:"image_url"`
	International     struct {
		Description map[string]string `json:"description"`
		DisplayName map[string]string `json:"display_name"`
	} `json:"international"`
	Level           int `json:"level"`
	RelatedConcepts []struct {
		DisplayName string  `json:"display_name"`
		ID          string  `json:"id"`
		Level       int     `json:"level"`
		Score       float64 `json:"score"`
		Wikidata    any     `json:"wikidata"` // TODO: replace any with struct
	} `json:"related_concepts"`
	SummaryStats struct {
		CitedByCount2yr  int     `json:"2yr_cited_by_count"`
		HIndex2yr        int     `json:"2yr_h_index"`
		I10Index2yr      int     `json:"2yr_i10_index"`
		MeanCitedness2yr float64 `json:"2yr_mean_citedness"`
		WorksCount2yr    int     `json:"2yr_works_count"`
		CitedByCount     int     `json:"cited_by_count"`
		HIndex           int     `json:"h_index"`
		I10Index         int     `json:"i10_index"`
		OaPercent        float64 `json:"oa_percent"`
		WorksCount       int     `json:"works_count"`
	} `json:"summary_stats"`
	UpdatedDate string `json:"updated_date"`
	Wikidata    string `json:"wikidata"`
	WorksAPIURL string `json:"works_api_url"`
	WorksCount  int    `json:"works_count"`
}

Concept is a struct that represents a concept in OpenAlex

func (*Concept) GetID

func (c *Concept) GetID() string

GetID returns the ID of the concept

func (*Concept) GetType

func (c *Concept) GetType() string

GetType returns the entity type

type Entity

type Entity interface {
	GetType() string
	GetID() string
}

type EntityFileSQL

type EntityFileSQL struct {
	gorm.Model
	SnapshotId     uint `gorm:"index"` //foreign key of snapshot
	EntityFileName string
	Identifier     string //ZipPath + "::" + EntityFolderName + "::" + DateFolderName + "::" + EntityZipName
	FullPath       string
	Done           bool
	Info           string
	EntityLines    []EntityLineSQL `gorm:"foreignkey:EntityFileId"`
}

type EntityLineSQL

type EntityLineSQL struct {
	gorm.Model
	EntityFileId uint `gorm:"index"` // Foreign key to EntityFile
	LineInfo     string
	Identifier   string //ZipPath + "::" + EntityFolderName + "::" + DateFolderName + "::" + EntityZipName + "::" + LineInfo
	Info         string
	FullPath     string
	Done         bool
}

type FileEntityType

type FileEntityType string
const (
	AuthorsFileEntityType      FileEntityType = "authors"
	ConceptsFileEntityType     FileEntityType = "concepts"
	FundersFileEntityType      FileEntityType = "funders"
	InstitutionsFileEntityType FileEntityType = "institution"
	PublishersFileEntityType   FileEntityType = "publisher"
	SourcesFileEntityType      FileEntityType = "sources"
	WorksFileEntityType        FileEntityType = "works"
)

func GetEntityType

func GetEntityType(filePath string) (result FileEntityType, err error)

type Funder

type Funder struct {
	ID              string   `json:"id"`
	AlternateTitles []string `json:"alternate_titles"`
	CitedByCount    int      `json:"cited_by_count"`
	CountryCode     string   `json:"country_code"`
	CountsByYear    []struct {
		CitedByCount int `json:"cited_by_count"`
		OaWorksCount int `json:"oa_works_count"`
		WorksCount   int `json:"works_count"`
		Year         int `json:"year"`
	} `json:"counts_by_year"`
	CreatedDate string  `json:"created_date"`
	Description *string `json:"description"`
	DisplayName string  `json:"display_name"`
	GrantsCount int     `json:"grants_count"`
	HomepageURL *string `json:"homepage_url"`
	Ids         struct {
		Crossref string `json:"crossref"`
		Doi      string `json:"doi"`
		Openalex string `json:"openalex"`
		Wikidata string `json:"wikidata,omitempty"`
		Ror      string `json:"ror"`
	} `json:"ids"`
	ImageThumbnailURL string `json:"image_thumbnail_url"`
	ImageURL          string `json:"image_url"`
	Roles             []struct {
		ID         string `json:"id"`
		Role       string `json:"role"`
		WorksCount int    `json:"works_count"`
	} `json:"roles"`
	SummaryStats struct {
		CitedByCount2yr  int     `json:"2yr_cited_by_count"`
		HIndex2yr        int     `json:"2yr_h_index"`
		I10Index2yr      int     `json:"2yr_i10_index"`
		MeanCitedness2yr float64 `json:"2yr_mean_citedness"`
		WorksCount2yr    int     `json:"2yr_works_count"`
		CitedByCount     int     `json:"cited_by_count"`
		HIndex           int     `json:"h_index"`
		I10Index         int     `json:"i10_index"`
		OaPercent        float64 `json:"oa_percent"`
		WorksCount       int     `json:"works_count"`
	} `json:"summary_stats"`
	UpdatedDate string `json:"updated_date"`
	WorksCount  int    `json:"works_count"`
}

Funder is a struct that represents the JSON response from the OpenAlex API.

func (*Funder) GetID

func (f *Funder) GetID() string

GetID returns the ID of the funder

func (*Funder) GetType

func (f *Funder) GetType() string

GetType returns the entity type

type Institution

type Institution struct {
	ID                     string `json:"id"`
	AssociatedInstitutions []struct {
		CountryCode  string `json:"country_code"`
		DisplayName  string `json:"display_name"`
		ID           string `json:"id"`
		Relationship string `json:"relationship"`
		Ror          string `json:"ror"`
		Type         string `json:"type"`
	} `json:"associated_institutions"`
	CitedByCount int    `json:"cited_by_count"`
	CountryCode  string `json:"country_code"`
	CountsByYear []struct {
		CitedByCount int `json:"cited_by_count"`
		WorksCount   int `json:"works_count"`
		Year         int `json:"year"`
	} `json:"counts_by_year"`
	CreatedDate             string   `json:"created_date"`
	DisplayName             string   `json:"display_name"`
	DisplayNameAcronyms     []string `json:"display_name_acronyms"`
	DisplayNameAlternatives []string `json:"display_name_alternatives"`
	Geo                     struct {
		City           string   `json:"city"`
		Country        string   `json:"country"`
		CountryCode    string   `json:"country_code"`
		GeonamesCityID string   `json:"geonames_city_id"`
		Latitude       *float64 `json:"latitude"`
		Longitude      *float64 `json:"longitude"`
		Region         *string  `json:"region"`
	} `json:"geo"`
	HomepageURL *string `json:"homepage_url"`
	Ids         struct {
		Grid      string          `json:"grid"`
		Mag       jsoniter.Number `json:"mag,omitempty"`
		Openalex  string          `json:"openalex"`
		Ror       string          `json:"ror"`
		Wikidata  string          `json:"wikidata,omitempty"`
		Wikipedia string          `json:"wikipedia,omitempty"`
	} `json:"ids"`
	ImageThumbnailURL *string `json:"image_thumbnail_url"`
	ImageURL          *string `json:"image_url"`
	International     struct {
		DisplayName map[string]string `json:"display_name"`
	} `json:"international"`
	Lineage      []string `json:"lineage"`
	Repositories []struct {
		DisplayName             string   `json:"display_name"`
		HostOrganization        string   `json:"host_organization"`
		HostOrganizationLineage []string `json:"host_organization_lineage"`
		HostOrganizationName    string   `json:"host_organization_name"`
		ID                      string   `json:"id"`
	} `json:"repositories"`
	Roles []struct {
		ID         string `json:"id"`
		Role       string `json:"role"`
		WorksCount int    `json:"works_count"`
	} `json:"roles"`
	Ror          string `json:"ror"`
	SummaryStats struct {
		CitedByCount2yr  int     `json:"2yr_cited_by_count"`
		HIndex2yr        int     `json:"2yr_h_index"`
		I10Index2yr      int     `json:"2yr_i10_index"`
		MeanCitedness2yr float64 `json:"2yr_mean_citedness"`
		WorksCount2yr    int     `json:"2yr_works_count"`
		CitedByCount     int     `json:"cited_by_count"`
		HIndex           int     `json:"h_index"`
		I10Index         int     `json:"i10_index"`
		OaPercent        float64 `json:"oa_percent"`
		WorksCount       int     `json:"works_count"`
	} `json:"summary_stats"`
	Type        string `json:"type"`
	UpdatedDate string `json:"updated_date"`
	WorksAPIURL string `json:"works_api_url"`
	WorksCount  int    `json:"works_count"`
	XConcepts   []struct {
		DisplayName string  `json:"display_name"`
		ID          string  `json:"id"`
		Level       int     `json:"level"`
		Score       float64 `json:"score"`
		Wikidata    string  `json:"wikidata"`
	} `json:"x_concepts"`
}

Institution is a struct that represents the JSON response from the OpenAlex API.

func (*Institution) GetID

func (i *Institution) GetID() string

GetID returns the ID of the institution

func (*Institution) GetType

func (i *Institution) GetType() string

GetType returns the entity type

type Manifest

type Manifest struct {
	Entries []struct {
		URL  string `json:"url"`
		Meta struct {
			ContentLength int `json:"content_length"`
			RecordCount   int `json:"record_count"`
		} `json:"meta"`
	} `json:"entries"`
	Meta struct {
		ContentLength int64 `json:"content_length"`
		RecordCount   int   `json:"record_count"`
	} `json:"meta"`
}

Manifest is a struct that represents the manifest file

func ReadManifestFromS3Url

func ReadManifestFromS3Url(s3Url ManifestUrl) (result *Manifest, err error)

ReadManifestFromS3Url reads the manifest file from S3 and returns a Manifest struct

func (*Manifest) CompareData

func (m *Manifest) CompareData(RootPath string, sh *StateHandler) (err error)

CompareData gets the amount of records in parsed data and compares it with the RecordCount in Manifest

func (*Manifest) Hash

func (m *Manifest) Hash() (result string, err error)

Hash returns the SHA256 hash of the manifest

type ManifestUrl

type ManifestUrl string

ManifestUrl is a type that represents a manifest URL

const (
	ManifestUrlAuthors      ManifestUrl = "https://openalex.s3.amazonaws.com/data/authors/manifest"
	ManifestUrlConcepts     ManifestUrl = "https://openalex.s3.amazonaws.com/data/concepts/manifest"
	ManifestUrlFunders      ManifestUrl = "https://openalex.s3.amazonaws.com/data/funders/manifest"
	ManifestUrlInstitutions ManifestUrl = "https://openalex.s3.amazonaws.com/data/institutions/manifest"
	ManifestUrlPublishers   ManifestUrl = "https://openalex.s3.amazonaws.com/data/publishers/manifest"
	ManifestUrlSources      ManifestUrl = "https://openalex.s3.amazonaws.com/data/sources/manifest"
	ManifestUrlWorks        ManifestUrl = "https://openalex.s3.amazonaws.com/data/works/manifest"
)

type MergedID

type MergedID struct {
	MergeDate   string
	ID          string
	MergeIntoID string
}

MergedID represents a row in the merged IDs file

type MergedIdRecordHandler

type MergedIdRecordHandler func(fileEntityType FileEntityType, mergedID MergedID) error

MergedIdRecordHandler is a function that handles a parsed line of a file

type ParsedEntityLineHandler

type ParsedEntityLineHandler func(fileEntityType FileEntityType, entity any) error

ParsedEntityLineHandler is a function that handles a parsed line of a file

type Publisher

type Publisher struct {
	ID              string   `json:"id"`
	AlternateTitles []string `json:"alternate_titles"`
	CitedByCount    int      `json:"cited_by_count"`
	CountryCodes    []string `json:"country_codes"`
	CountsByYear    []struct {
		CitedByCount int `json:"cited_by_count"`
		OaWorksCount int `json:"oa_works_count"`
		WorksCount   int `json:"works_count"`
		Year         int `json:"year"`
	} `json:"counts_by_year"`
	CreatedDate    string  `json:"created_date"`
	DisplayName    string  `json:"display_name"`
	HierarchyLevel int     `json:"hierarchy_level"`
	HomepageURL    *string `json:"homepage_url"`
	Ids            struct {
		Openalex string `json:"openalex"`
		Wikidata string `json:"wikidata,omitempty"`
		Ror      string `json:"ror"`
	} `json:"ids"`
	ImageThumbnailURL *string  `json:"image_thumbnail_url"`
	ImageURL          *string  `json:"image_url"`
	Lineage           []string `json:"lineage"`
	ParentPublisher   *string  `json:"parent_publisher"`
	Roles             []struct {
		ID         string `json:"id"`
		Role       string `json:"role"`
		WorksCount int    `json:"works_count"`
	} `json:"roles"`
	SourcesAPIURL string `json:"sources_api_url"`
	SummaryStats  struct {
		CitedByCount2yr  int     `json:"2yr_cited_by_count"`
		HIndex2yr        int     `json:"2yr_h_index"`
		I10Index2yr      int     `json:"2yr_i10_index"`
		MeanCitedness2yr float64 `json:"2yr_mean_citedness"`
		WorksCount2yr    int     `json:"2yr_works_count"`
		CitedByCount     int     `json:"cited_by_count"`
		HIndex           int     `json:"h_index"`
		I10Index         int     `json:"i10_index"`
		OaPercent        float64 `json:"oa_percent"`
		SourcesCount     int     `json:"sources_count"`
		WorksCount       int     `json:"works_count"`
	} `json:"summary_stats"`
	UpdatedDate string `json:"updated_date"`
	WorksCount  int    `json:"works_count"`
	XConcepts   []struct {
		DisplayName string  `json:"display_name"`
		ID          string  `json:"id"`
		Level       int     `json:"level"`
		Score       float64 `json:"score"`
		Wikidata    string  `json:"wikidata"`
	} `json:"x_concepts"`
}

func (*Publisher) GetID

func (p *Publisher) GetID() string

GetID returns the ID of the publisher

func (*Publisher) GetType

func (p *Publisher) GetType() string

GetType returns the entity type

type SnapshotSQL

type SnapshotSQL struct {
	gorm.Model
	SnapshotId   string `gorm:"unique"`
	ZipPath      string //Identifier
	DatabasePath string
	Done         bool
	Info         string
	EntityFiles  []EntityFileSQL `gorm:"foreignkey:SnapshotId"`
}

e.g. C:/openalex/data/

type Source

type Source struct {
	ID               string   `json:"id"`
	AbbreviatedTitle *string  `json:"abbreviated_title"`
	AlternateTitles  []string `json:"alternate_titles"`
	ApcPrices        []struct {
		Currency string `json:"currency"`
		Price    int    `json:"price"`
	} `json:"apc_prices"`
	ApcUsd       *int    `json:"apc_usd"`
	CitedByCount int     `json:"cited_by_count"`
	CountryCode  *string `json:"country_code"`
	CountsByYear []struct {
		CitedByCount int `json:"cited_by_count"`
		WorksCount   int `json:"works_count"`
		Year         int `json:"year"`
	} `json:"counts_by_year"`
	CreatedDate             string   `json:"created_date"`
	DisplayName             string   `json:"display_name"`
	HomepageURL             *string  `json:"homepage_url"`
	HostOrganization        *string  `json:"host_organization"`
	HostOrganizationLineage []string `json:"host_organization_lineage"`
	HostOrganizationName    *string  `json:"host_organization_name"`
	Ids                     struct {
		Fatcat   string          `json:"fatcat,omitempty"`
		Issn     []string        `json:"issn,omitempty"`
		IssnL    string          `json:"issn_l,omitempty"`
		Mag      jsoniter.Number `json:"mag,omitempty"`
		Openalex string          `json:"openalex"`
		Wikidata string          `json:"wikidata,omitempty"`
	} `json:"ids"`
	IsInDoaj  bool     `json:"is_in_doaj"`
	IsOa      bool     `json:"is_oa"`
	Issn      []string `json:"issn"`
	IssnL     *string  `json:"issn_l"`
	Societies []struct {
		Url          *string `json:"url"`
		Organization *string `json:"organization"`
	} `json:"societies"`
	SummaryStats struct {
		CitedByCount2yr  int     `json:"2yr_cited_by_count"`
		HIndex2yr        int     `json:"2yr_h_index"`
		I10Index2yr      int     `json:"2yr_i10_index"`
		MeanCitedness2yr float64 `json:"2yr_mean_citedness"`
		WorksCount2yr    int     `json:"2yr_works_count"`
		CitedByCount     int     `json:"cited_by_count"`
		HIndex           int     `json:"h_index"`
		I10Index         int     `json:"i10_index"`
		OaPercent        float64 `json:"oa_percent"`
		WorksCount       int     `json:"works_count"`
	} `json:"summary_stats"`
	Type        string `json:"type"`
	UpdatedDate string `json:"updated_date"`
	WorksAPIURL string `json:"works_api_url"`
	WorksCount  int    `json:"works_count"`
	XConcepts   []struct {
		DisplayName string  `json:"display_name"`
		ID          string  `json:"id"`
		Level       int     `json:"level"`
		Score       float64 `json:"score"`
		Wikidata    string  `json:"wikidata"`
	} `json:"x_concepts"`
}

func (*Source) GetID

func (s *Source) GetID() string

GetID returns the ID of the source

func (*Source) GetType

func (s *Source) GetType() string

GetType returns the entity type

type StateHandler

type StateHandler struct {
	//initialize these
	DatabaseName    string //e.g. log.db, for the initializer
	DatabaseDir     string //path of the .db, e.g. C:\docdb\ or .\ for relative path
	SnapshotZipPath string //full path to the snapshot zip
	SafeDeleteOnly  bool

	DatabasePath string //Database Dir + Database Name
	// contains filtered or unexported fields
}

StateHandler contains the config for the state handler

func NewStateHandler

func NewStateHandler(databaseName string, databaseDir string, snapshotZipPath string) *StateHandler

New creates a new state handler

func (*StateHandler) Initialize

func (sh *StateHandler) Initialize()

Initialize loads Last Known State Creates DB if there is none returns false if the processing is already finished returns true if there is some processing left to be done

func (*StateHandler) IsSnapshotFinished

func (sh *StateHandler) IsSnapshotFinished() bool

GetDirectoryProcessStatus no if directory.done = false or no entry exists

func (*StateHandler) MarkEntityFileAsFinished

func (sh *StateHandler) MarkEntityFileAsFinished()

func (*StateHandler) MarkEntityLineAsFinished

func (sh *StateHandler) MarkEntityLineAsFinished()

func (*StateHandler) MarkSnapshotAsFinished

func (sh *StateHandler) MarkSnapshotAsFinished()

func (*StateHandler) MarkSnapshotAsUpdated

func (sh *StateHandler) MarkSnapshotAsUpdated()

func (*StateHandler) RegisterOrSkipEntityFile

func (sh *StateHandler) RegisterOrSkipEntityFile(filePath string) (bool, error)

RegisterOrEntityFile returns True if the entity file is already processed If the EntityFile entry does not exist, creates a new one (using the current processDir as foreign key) or loads the existing bulk file information if the entry exists but is not done

func (*StateHandler) RegisterOrSkipEntityLine

func (sh *StateHandler) RegisterOrSkipEntityLine(line_info string) (bool, error)

func (*StateHandler) SetSafeDelete

func (sh *StateHandler) SetSafeDelete(status bool)

SetSafeDelete no if directory.done = false or no entry exists

type Work

type Work struct {
	ID                    string           `json:"id"`
	Abstract              string           `json:"abstract"`
	AbstractInvertedIndex map[string][]int `json:"abstract_inverted_index,omitempty"`
	Authorships           []struct {
		Author struct {
			DisplayName string  `json:"display_name"`
			ID          string  `json:"id"`
			Orcid       *string `json:"orcid"`
		} `json:"author"`
		AuthorPosition string `json:"author_position"`
		Institutions   []struct {
			CountryCode *string `json:"country_code"`
			DisplayName string  `json:"display_name"`
			ID          *string `json:"id"`
			Ror         *string `json:"ror"`
			Type        *string `json:"type"`
		} `json:"institutions"`
		RawAffiliationString *string  `json:"raw_affiliation_string"`
		RawAuthorName        *string  `json:"raw_author_name"`
		IsCorresponding      *bool    `json:"is_corresponding"`
		Countries            []string `json:"countries"`
	} `json:"authorships"`
	ApcList struct {
		Value      int     `json:"value"`
		Currency   *string `json:"currency"`
		Provenance *string `json:"provenance"`
		ValueUsd   int     `json:"value_usd"`
	} `json:"apc_list"`
	BestOALocation struct {
		IsOA           *bool   `json:"is_oa"`
		LandingPageUrl *string `json:"landing_page_url"`
		PdfUrl         *string `json:"pdf_url"`
		License        *string `json:"license"`
		Version        *string `json:"version"`
		Source         struct {
			Id               *string  `json:"id"`
			DisplayName      *string  `json:"display_name"`
			IssnL            *string  `json:"issn_l"`
			Issn             []string `json:"issn"`
			HostOrganization *string  `json:"host_organization"`
			Type             *string  `json:"type"`
		} `json:"source"`
	} `json:"best_oa_location"`
	Biblio struct {
		FirstPage *string `json:"first_page"`
		Issue     *string `json:"issue"`
		LastPage  *string `json:"last_page"`
		Volume    *string `json:"volume"`
	} `json:"biblio"`
	CitedByAPIURL string `json:"cited_by_api_url"`
	CitedByCount  int    `json:"cited_by_count"`
	Concepts      []struct {
		DisplayName string  `json:"display_name"`
		ID          string  `json:"id"`
		Level       int     `json:"level"`
		Score       float64 `json:"score"`
		Wikidata    string  `json:"wikidata"`
	} `json:"concepts"`
	CorrespondingAuthorIds      []string `json:"corresponding_author_ids"`
	CorrespondingInstitutionIds []string `json:"corresponding_institution_ids"`
	CountriesDistinctCount      int      `json:"countries_distinct_count"`
	CountsByYear                []struct {
		CitedByCount int `json:"cited_by_count"`
		Year         int `json:"year"`
	} `json:"counts_by_year"`
	CreatedDate string `json:"created_date"`
	DisplayName string `json:"display_name"`
	Doi         string `json:"doi"`
	Grants      []struct {
		Funder            string `json:"funder"`
		FunderDisplayName string `json:"funder_display_name"`
		AwardId           string `json:"award_id"`
	}
	HasFulltext               *bool  `json:"has_fulltext"`
	InstitutionsDistinctCount int    `json:"institutions_distinct_count"`
	Language                  string `json:"language"`
	Locations                 []struct {
		IsOA           *bool   `json:"is_oa"`
		LandingPageUrl *string `json:"landing_page_url"`
		PdfUrl         *string `json:"pdf_url"`
		Source         struct {
			Id               *string  `json:"id"`
			DisplayName      *string  `json:"display_name"`
			IssnL            *string  `json:"issn_l"`
			Issn             []string `json:"issn"`
			HostOrganization *string  `json:"host_organization"`
			Type             *string  `json:"type"`
		} `json:"source"`
		License *string `json:"license"`
		Version *string `json:"version"`
	} `json:"locations"`
	PrimaryLocation struct {
		IsOA           *bool   `json:"is_oa"`
		LandingPageUrl *string `json:"landing_page_url"`
		PdfUrl         *string `json:"pdf_url"`
		Source         struct {
			Id               *string  `json:"id"`
			DisplayName      *string  `json:"display_name"`
			IssnL            *string  `json:"issn_l"`
			Issn             []string `json:"issn"`
			HostOrganization *string  `json:"host_organization"`
			Type             *string  `json:"type"`
		} `json:"source"`
		License *string `json:"license"`
		Version *string `json:"version"`
	} `json:"primary_location"`
	LocationCount int `json:"location_count"`
	Ids           struct {
		Doi      string          `json:"doi"`
		Mag      jsoniter.Number `json:"mag"`
		Openalex string          `json:"openalex"`
		Pmid     string          `json:"pmid,omitempty"`
	} `json:"ids"`
	IsParatext  bool `json:"is_paratext"`
	IsRetracted bool `json:"is_retracted"`
	Mesh        []struct {
		DescriptorName string  `json:"descriptor_name"`
		DescriptorUi   string  `json:"descriptor_ui"`
		IsMajorTopic   bool    `json:"is_major_topic"`
		QualifierName  *string `json:"qualifier_name"`
		QualifierUi    *string `json:"qualifier_ui"`
	} `json:"mesh"`
	OpenAccess struct {
		IsOa                     bool    `json:"is_oa"`
		OaStatus                 string  `json:"oa_status"`
		OaURL                    *string `json:"oa_url"`
		AnyRepositoryHasFulltext bool    `json:"any_repository_has_fulltext"`
	} `json:"open_access"`
	PublicationDate string   `json:"publication_date"`
	PublicationYear int      `json:"publication_year"`
	ReferencedWorks []string `json:"referenced_works"`
	RelatedWorks    []string `json:"related_works"`
	Title           string   `json:"title"`
	Type            string   `json:"type"`
	UpdatedDate     string   `json:"updated_date"`
	TypeCrossref    string   `json:"type_crossref"`
}

Work is the struct for a work in the open alex database

func (*Work) GenerateAbstractFromInvertedIndex

func (w *Work) GenerateAbstractFromInvertedIndex() *Work

GenerateAbstractFromInvertedIndex sets the abstract

func (*Work) GetID

func (w *Work) GetID() string

GetID returns the ID of the work

func (*Work) GetType

func (w *Work) GetType() string

GetType returns the entity type

func (*Work) SetAbstractInvertedIndexToNil

func (w *Work) SetAbstractInvertedIndexToNil() *Work

SetAbstractInvertedIndexToNil sets the abstract inverted index to nil

func (*Work) ToAbstract

func (w *Work) ToAbstract() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL