dbcs

package
v0.19.8 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 12, 2021 License: GPL-3.0 Imports: 11 Imported by: 0

Documentation

Index

Constants

View Source
const (
	N_FIRST_COMMENTS                = 10
	COMMENT_STEP_DURATION           = 1 * time.Millisecond
	REPLY_STEP_NANO_TS              = 100000  // 0.1 millisecond
	DELETE_STEP_NANO_TS             = 10000   // 0.01 milliseond
	COMMENT_STEP_NANO_TS            = 1000000 // 1 millisecond
	COMMENT_EXCEED_NANO_TS          = 1000    // 0.001 millisecond
	COMMENT_STEP_DIFF_NANO_TS       = 2 * 60 * types.TS_TO_NANO_TS
	COMMENT_STEP_DIFF2_NANO_TS      = 2 * 86400 * types.TS_TO_NANO_TS
	COMMENT_BACKWARD_OFFSET_NANO_TS = 900000000 // 900 millisecond
	COMMENT_DIFF_ALIGN_END_NANO_TS  = 60 * types.TS_TO_NANO_TS
	COMMENT_DIFF2_ALIGN_END_NANO_TS = 86400 * types.TS_TO_NANO_TS

	DEFAULT_LINE_BYTES = 200

	LEN_OLD_RECOMMEND_DATE = 5
	LEN_RECOMMEND_DATE     = 11

	ONE_YEAR_OFFSET_NANO_TS = 365 * 86400 * types.TS_TO_NANO_TS
)

Variables

View Source
var (
	MATCH_COMMENT_RECOMMEND_BYTES = []byte{

		0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x37, 0x6d,
		0xb1, 0xc0, 0x20, 0x1b, 0x5b, 0x33, 0x33, 0x6d,
	}

	MATCH_COMMENT_BOO_BYTES = []byte{

		0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x31, 0x6d,
		0xbc, 0x4e, 0x20, 0x1b, 0x5b, 0x33, 0x33, 0x6d,
	}

	MATCH_COMMENT_ARROW_BYTES = []byte{

		0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x31, 0x6d,
		0xa1, 0xf7, 0x20, 0x1b, 0x5b, 0x33, 0x33, 0x6d,
	}

	//※ 編輯: abcd (1.2.3.4 臺灣), 03/21/2021 03:04:47
	//\xa1\xb0 \xbds\xbf\xe8: abcd (1.2.3.4 \xbbO\xc6W), 03/18/2021 12:07:22
	MATCH_COMMENT_EDIT_BYTES = []byte("\xa1\xb0 \xbds\xbf\xe8: ")

	MATCH_COMMENT_EDIT_FROM_BYTES = []byte("\xa8\xd3\xa6\xdb: ")

	//※ abcde:轉錄至看板 SYSOP
	//\xa1\xb0 \x1b[1;32mabcd\x1b[0;32m:\xc2\xe0\xbf\xfd\xa6\xdc\xac\xdd\xaaO Mavericks\x1b[m                               03/18 12:07
	//※ jasome:轉錄至某隱形看板
	//\xa1\xb0 \x1b[1;32mjasome\x1b[0;32m:\xc2\xe0\xbf\xfd\xa6\xdc\xacY\xc1\xf4\xa7\xce\xac\xdd\xaaO\x1b[m                                         01/29 02:39
	MATCH_COMMENT_FORWARD_BYTES       = []byte("\x1b[0;32m:\xc2\xe0\xbf\xfd\xa6\xdc") //\x1b[0;32m:\xc2
	MATCH_COMMENT_FORWARD_BOARD_BYTES = []byte("\xac\xdd\xaaO ")

	MATCH_COMMENT_FORWARD_PREFIX = []byte("\xa1\xb0 \x1b[1;32m")

	//(teemocogs 刪除 teemocogs 的推文: 誤植)
	//\x1b[1;30m(teemocogs \xa7R\xb0\xa3 teemocogs \xaa\xba\xb1\xc0\xa4\xe5: \xbb~\xb4\xd3)\x1b[m
	MATCH_COMMENT_DELETED_PREFIX  = []byte("\x1b[1;30m(")
	MATCH_COMMENT_DELETED_INFIX0  = []byte(" \xa7R\xb0\xa3 ")
	MATCH_COMMENT_DELETED_INFIX1  = []byte(" \xaa\xba\xb1\xc0\xa4\xe5: ")
	MATCH_COMMENT_DELETED_POSTFIX = []byte(")\x1b[m")

	MATCH_COMMENT_GREEN_PREFIX = []byte("\xa1\xb0 ") //※
)
View Source
var (
	MATCH_SIGNATURE_FROM = []byte{
		0x29, 0x2c, 0x20, 0xa8, 0xd3, 0xa6, 0xdb, 0x3a, 0x20,
	}

	MATCH_SIGNATURE_FROM_OLD = []byte{
		0xa1, 0xbb, 0x20, 0x46, 0x72, 0x6f, 0x6d, 0x3a, 0x20,
	}

	MATCH_SIGNATURE_FORWARD = []byte{
		0xa1, 0xb0, 0x20, 0xc2, 0xe0, 0xbf, 0xfd, 0xaa, 0xcc, 0x3a, 0x20,
	}

	MATCH_SIGNATURE_URL = []byte{
		0xa1, 0xb0, 0x20, 0xa4, 0xe5, 0xb3, 0xb9, 0xba, 0xf4, 0xa7, 0x7d, 0x3a, 0x20,
	}
)
View Source
var MATCH_SIGNATURE_INIT = []byte{
	0x0a, 0xa1, 0xb0, 0x20, 0xb5,
	0x6f, 0xab, 0x48, 0xaf, 0xb8, 0x3a, 0x20,
}

Functions

func InferTimestamp added in v0.15.0

func InferTimestamp(edBlocks []*EDBlock, isForwardOnly bool, isLastAlignEndNanoTS bool, articleCreateTime types.NanoTS) (nBlock int)

func IntegrateComments added in v0.15.0

func IntegrateComments(boardID bbs.BBoardID, articleID bbs.ArticleID, comments []*schema.Comment, articleCreateTime types.NanoTS, articleMTime types.NanoTS, isForwardOnly bool, isLastAlignEndNanoTS bool) (newComments []*schema.Comment, toDeleteComments []*schema.CommentMD5, err error)

func MatchComment added in v0.15.0

func MatchComment(content []byte) int

MatchComment

TODO: record the idxes of each condition, rematch only the condition with the smallest idx.

func MatchCommentType added in v0.15.0

func MatchCommentType(commentDBCS []byte) (theType ptttype.CommentType, nextCommentDBCS []byte)

func ParseComments

func ParseComments(
	ownerID bbs.UUserID,
	commentsDBCS []byte,
	allCommentsDBCS []byte,
) (comments []*schema.Comment)

ParseComments

有可能 reply-edit-info (編輯) 不在 commentsDBCS 裡 但是會在 allCommentsDBCS 裡 (firstComments) 只考慮: 1. appropriately split comments. 2. 對於每個 comment 裡的 DBCS Parse 成 Utf8. 3. type / IP / Host / MD5 / TheDate

不考慮: 1. boardID / articleID / commentID. 2. createTime / firstCreateTime / InferredCreateTime / AddCreateTime (除了編輯以外)

  1. 根據 '\n' 估計 nComments
  2. 找出 pre-comment reply.
  3. 對於每個 comment-leading newline for-loop: 3.0. parse comment 3.1. 找下一個 comment 3.1.1. 如果沒有更多 comment: 假設剩下 text 的都是 reply. 3.2. 假設下一個 comment 之前的 text 都是 reply.
  4. (outside for-loop): 處理最後一個沒有 '\n' 的 comment.

func ParseContent

func ParseContent(contentBytes []byte, origContentMD5 string) (content [][]*types.Rune, contentMD5 string, ip string, host string, bbs string, signatureMD5 string, signatureDBCS []byte, commentsDBCS []byte)

ParseContent

Assume: 1. the content is with chars >= 32 and '\x1b', '\r', \n' 2. the timestamp of the 1st-comments (around 10 comments, including the last-same-min comments) are within 1-year of the createTime. 3. the timestamp of the rest of the comments are able to reverse-inferred from mtime. compared as stored as nano-ts. 4. assuming no more than 60000 comments (60 x 1000) in 1 minute.

func ParseFirstComments

func ParseFirstComments(
	bboardID bbs.BBoardID,
	articleID bbs.ArticleID,
	ownerID bbs.UUserID,
	articleCreateTime types.NanoTS,
	articleMTime types.NanoTS,
	commentsDBCS []byte,
	origFirstCommentsMD5 string) (

	firstComments []*schema.Comment,
	firstCommentsMD5 string,
	theRestCommentsDBCS []byte,
	err error)

ParseFirstComments

Check with origFirstCommentsMD5, if exists, return nil and requires getting firstComments and lastTime from db.

func Utf8ToDBCS added in v0.15.0

func Utf8ToDBCS(utf8 [][]*types.Rune) (dbcs [][]byte)

Types

type DBCSState

type DBCSState int
const (
	DBCS_STATE_NONE  DBCSState = 0
	DBCS_STATE_LEAD  DBCSState = 1
	DBCS_STATE_TAIL  DBCSState = 2
	DBCS_STATE_COLOR DBCSState = 3
)

func (DBCSState) String added in v0.15.0

func (d DBCSState) String() string

type EDBlock added in v0.15.0

type EDBlock struct {
	NewComments  []*EDInfo
	OrigComments []*EDInfo
	StartNanoTS  types.NanoTS
	EndNanoTS    types.NanoTS
}

func CalcEDBlocks added in v0.15.0

func CalcEDBlocks(newComments []*schema.Comment, origComments []*schema.CommentMD5, articleCreateTime types.NanoTS, articleMTime types.NanoTS) (edBlocks []*EDBlock, err error)

CalcEDBlocks

Must already guarantee that: 1. articleCreateTime < all origComments.SortTime 2. articleMTime >= all origComments.SortTime 3. origComments are sorted by SortTime 4. newComments are sorted by the line-idx.

func (*EDBlock) AlignEndNanoTS added in v0.15.0

func (ed *EDBlock) AlignEndNanoTS()

func (*EDBlock) BackwardInferTS added in v0.15.0

func (ed *EDBlock) BackwardInferTS(nextIdx int, isAlignEndNanoTS bool)

BackwardInferTS

func (*EDBlock) ForwardInferTS added in v0.15.0

func (ed *EDBlock) ForwardInferTS(startNanoTS types.NanoTS) (nextIdx int)

ForwardInferTS

func (*EDBlock) InferTimestamp added in v0.15.0

func (ed *EDBlock) InferTimestamp(articleCreateTime types.NanoTS, isForwardOnly bool, isLastAlignEndNanoTS bool)

InferTimestamp

  1. OrigComments are sorted between ed.StartNanoTS and ed.EndNanoTS
  2. It's possible that the newComments are with out-of-range time.
  3. It's possible that multiple comments shares the same date-str, but we still need some way to make the timestamp unique.
  4. The time from OrigComments should not be moved.

The possibilities that new-comments are in between original-comments: XXX 1. delete (try to map the corresponding deleted messages)

We don't do this to simplify mapping sequence.

2. reply (previous-appearing-message (currentNanoTS in same or newComments) + REPLY_STEP_NANO_TS) 3. new messages. (sort-time should be after the deleted-messages) 4. others (the owners accidentally edited something, sort-time should be after the deleted-messages)

type EDInfo added in v0.15.0

type EDInfo struct {
	Op          EDOp
	NewComment  *schema.Comment // SAME/DELETE: origComments, ADD: newComments
	OrigComment *schema.CommentMD5
	SortTime    types.NanoTS
}

func NewEDInfoFromAddComment added in v0.15.0

func NewEDInfoFromAddComment(comment *schema.Comment) (edInfo *EDInfo)

func NewEDInfoFromDeleteComment added in v0.15.0

func NewEDInfoFromDeleteComment(commentMD5 *schema.CommentMD5) (edInfo *EDInfo)

func NewEDInfoFromSameComment added in v0.15.0

func NewEDInfoFromSameComment(newComment *schema.Comment, origCommentMD5 *schema.CommentMD5) (edInfo *EDInfo)

type EDInfoMeta added in v0.15.0

type EDInfoMeta struct {

	// StartNanoTS (not included)
	StartNanoTS types.NanoTS

	// EndNanoTS (not included except the last ed-info)
	EndNanoTS types.NanoTS

	// StartIdx (included)
	StartIdx int

	// EndIdx (not incldued)
	EndIdx int
}

func (*EDInfoMeta) ToEDBlock added in v0.15.0

func (meta *EDInfoMeta) ToEDBlock(edInfos []*EDInfo) (edBlock *EDBlock)

ToEDBlock

Given the list of edInfos, where NewComments are OrigComments are already separately sorted, construct the corresponding ed-block.

type EDOp added in v0.15.0

type EDOp uint8
const (
	ED_OP_UNKNOWN EDOp = 0
	ED_OP_SAME    EDOp = 1
	ED_OP_DELETE  EDOp = 2
	ED_OP_ADD     EDOp = 3
)

type INFER_TIMESTAMP_TYPE added in v0.15.0

type INFER_TIMESTAMP_TYPE uint8
const (
	INFER_TIMESTAMP_INVALID INFER_TIMESTAMP_TYPE = 0
	INFER_TIMESTAMP_YMDHM   INFER_TIMESTAMP_TYPE = 1
	INFER_TIMESTAMP_YMD     INFER_TIMESTAMP_TYPE = 2
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL