Documentation ¶
Index ¶
- Constants
- Variables
- func InferTimestamp(edBlocks []*EDBlock, isForwardOnly bool, isLastAlignEndNanoTS bool, ...) (nBlock int)
- func IntegrateComments(boardID bbs.BBoardID, articleID bbs.ArticleID, comments []*schema.Comment, ...) (newComments []*schema.Comment, toDeleteComments []*schema.CommentMD5, ...)
- func MatchComment(content []byte) int
- func MatchCommentType(commentDBCS []byte) (theType ptttype.CommentType, nextCommentDBCS []byte)
- func ParseComments(ownerID bbs.UUserID, commentsDBCS []byte, allCommentsDBCS []byte) (comments []*schema.Comment)
- func ParseContent(contentBytes []byte, origContentMD5 string) (content [][]*types.Rune, contentMD5 string, ip string, host string, bbs string, ...)
- func ParseFirstComments(bboardID bbs.BBoardID, articleID bbs.ArticleID, ownerID bbs.UUserID, ...) (firstComments []*schema.Comment, firstCommentsMD5 string, ...)
- func Utf8ToDBCS(utf8 [][]*types.Rune) (dbcs [][]byte)
- type DBCSState
- type EDBlock
- type EDInfo
- type EDInfoMeta
- type EDOp
- type INFER_TIMESTAMP_TYPE
Constants ¶
const ( N_FIRST_COMMENTS = 10 COMMENT_STEP_DURATION = 1 * time.Millisecond REPLY_STEP_NANO_TS = 100000 // 0.1 millisecond DELETE_STEP_NANO_TS = 10000 // 0.01 milliseond COMMENT_STEP_NANO_TS = 1000000 // 1 millisecond COMMENT_EXCEED_NANO_TS = 1000 // 0.001 millisecond COMMENT_STEP_DIFF_NANO_TS = 2 * 60 * types.TS_TO_NANO_TS COMMENT_STEP_DIFF2_NANO_TS = 2 * 86400 * types.TS_TO_NANO_TS COMMENT_BACKWARD_OFFSET_NANO_TS = 900000000 // 900 millisecond COMMENT_DIFF_ALIGN_END_NANO_TS = 60 * types.TS_TO_NANO_TS COMMENT_DIFF2_ALIGN_END_NANO_TS = 86400 * types.TS_TO_NANO_TS DEFAULT_LINE_BYTES = 200 LEN_OLD_RECOMMEND_DATE = 5 LEN_RECOMMEND_DATE = 11 ONE_YEAR_OFFSET_NANO_TS = 365 * 86400 * types.TS_TO_NANO_TS )
Variables ¶
var ( MATCH_COMMENT_RECOMMEND_BYTES = []byte{ 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x37, 0x6d, 0xb1, 0xc0, 0x20, 0x1b, 0x5b, 0x33, 0x33, 0x6d, } MATCH_COMMENT_BOO_BYTES = []byte{ 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x31, 0x6d, 0xbc, 0x4e, 0x20, 0x1b, 0x5b, 0x33, 0x33, 0x6d, } MATCH_COMMENT_ARROW_BYTES = []byte{ 0x1b, 0x5b, 0x31, 0x3b, 0x33, 0x31, 0x6d, 0xa1, 0xf7, 0x20, 0x1b, 0x5b, 0x33, 0x33, 0x6d, } //※ 編輯: abcd (1.2.3.4 臺灣), 03/21/2021 03:04:47 //\xa1\xb0 \xbds\xbf\xe8: abcd (1.2.3.4 \xbbO\xc6W), 03/18/2021 12:07:22 MATCH_COMMENT_EDIT_BYTES = []byte("\xa1\xb0 \xbds\xbf\xe8: ") MATCH_COMMENT_EDIT_FROM_BYTES = []byte("\xa8\xd3\xa6\xdb: ") //※ abcde:轉錄至看板 SYSOP //\xa1\xb0 \x1b[1;32mabcd\x1b[0;32m:\xc2\xe0\xbf\xfd\xa6\xdc\xac\xdd\xaaO Mavericks\x1b[m 03/18 12:07 //※ jasome:轉錄至某隱形看板 //\xa1\xb0 \x1b[1;32mjasome\x1b[0;32m:\xc2\xe0\xbf\xfd\xa6\xdc\xacY\xc1\xf4\xa7\xce\xac\xdd\xaaO\x1b[m 01/29 02:39 MATCH_COMMENT_FORWARD_BYTES = []byte("\x1b[0;32m:\xc2\xe0\xbf\xfd\xa6\xdc") //\x1b[0;32m:\xc2 MATCH_COMMENT_FORWARD_BOARD_BYTES = []byte("\xac\xdd\xaaO ") MATCH_COMMENT_FORWARD_PREFIX = []byte("\xa1\xb0 \x1b[1;32m") //(teemocogs 刪除 teemocogs 的推文: 誤植) //\x1b[1;30m(teemocogs \xa7R\xb0\xa3 teemocogs \xaa\xba\xb1\xc0\xa4\xe5: \xbb~\xb4\xd3)\x1b[m MATCH_COMMENT_DELETED_PREFIX = []byte("\x1b[1;30m(") MATCH_COMMENT_DELETED_INFIX0 = []byte(" \xa7R\xb0\xa3 ") MATCH_COMMENT_DELETED_INFIX1 = []byte(" \xaa\xba\xb1\xc0\xa4\xe5: ") MATCH_COMMENT_DELETED_POSTFIX = []byte(")\x1b[m") MATCH_COMMENT_GREEN_PREFIX = []byte("\xa1\xb0 ") //※ )
var ( MATCH_SIGNATURE_FROM = []byte{ 0x29, 0x2c, 0x20, 0xa8, 0xd3, 0xa6, 0xdb, 0x3a, 0x20, } MATCH_SIGNATURE_FROM_OLD = []byte{ 0xa1, 0xbb, 0x20, 0x46, 0x72, 0x6f, 0x6d, 0x3a, 0x20, } MATCH_SIGNATURE_FORWARD = []byte{ 0xa1, 0xb0, 0x20, 0xc2, 0xe0, 0xbf, 0xfd, 0xaa, 0xcc, 0x3a, 0x20, } MATCH_SIGNATURE_URL = []byte{ 0xa1, 0xb0, 0x20, 0xa4, 0xe5, 0xb3, 0xb9, 0xba, 0xf4, 0xa7, 0x7d, 0x3a, 0x20, } )
var MATCH_SIGNATURE_INIT = []byte{
0x0a, 0xa1, 0xb0, 0x20, 0xb5,
0x6f, 0xab, 0x48, 0xaf, 0xb8, 0x3a, 0x20,
}
Functions ¶
func InferTimestamp ¶ added in v0.15.0
func IntegrateComments ¶ added in v0.15.0
func MatchComment ¶ added in v0.15.0
MatchComment
TODO: record the idxes of each condition, rematch only the condition with the smallest idx.
func MatchCommentType ¶ added in v0.15.0
func MatchCommentType(commentDBCS []byte) (theType ptttype.CommentType, nextCommentDBCS []byte)
func ParseComments ¶
func ParseComments( ownerID bbs.UUserID, commentsDBCS []byte, allCommentsDBCS []byte, ) (comments []*schema.Comment)
ParseComments
有可能 reply-edit-info (編輯) 不在 commentsDBCS 裡 但是會在 allCommentsDBCS 裡 (firstComments) 只考慮: 1. appropriately split comments. 2. 對於每個 comment 裡的 DBCS Parse 成 Utf8. 3. type / IP / Host / MD5 / TheDate
不考慮: 1. boardID / articleID / commentID. 2. createTime / firstCreateTime / InferredCreateTime / AddCreateTime (除了編輯以外)
- 根據 '\n' 估計 nComments
- 找出 pre-comment reply.
- 對於每個 comment-leading newline for-loop: 3.0. parse comment 3.1. 找下一個 comment 3.1.1. 如果沒有更多 comment: 假設剩下 text 的都是 reply. 3.2. 假設下一個 comment 之前的 text 都是 reply.
- (outside for-loop): 處理最後一個沒有 '\n' 的 comment.
func ParseContent ¶
func ParseContent(contentBytes []byte, origContentMD5 string) (content [][]*types.Rune, contentMD5 string, ip string, host string, bbs string, signatureMD5 string, signatureDBCS []byte, commentsDBCS []byte)
ParseContent
Assume: 1. the content is with chars >= 32 and '\x1b', '\r', \n' 2. the timestamp of the 1st-comments (around 10 comments, including the last-same-min comments) are within 1-year of the createTime. 3. the timestamp of the rest of the comments are able to reverse-inferred from mtime. compared as stored as nano-ts. 4. assuming no more than 60000 comments (60 x 1000) in 1 minute.
func ParseFirstComments ¶
func ParseFirstComments( bboardID bbs.BBoardID, articleID bbs.ArticleID, ownerID bbs.UUserID, articleCreateTime types.NanoTS, articleMTime types.NanoTS, commentsDBCS []byte, origFirstCommentsMD5 string) ( firstComments []*schema.Comment, firstCommentsMD5 string, theRestCommentsDBCS []byte, err error)
ParseFirstComments
Check with origFirstCommentsMD5, if exists, return nil and requires getting firstComments and lastTime from db.
func Utf8ToDBCS ¶ added in v0.15.0
Types ¶
type EDBlock ¶ added in v0.15.0
type EDBlock struct { NewComments []*EDInfo OrigComments []*EDInfo StartNanoTS types.NanoTS EndNanoTS types.NanoTS }
func CalcEDBlocks ¶ added in v0.15.0
func CalcEDBlocks(newComments []*schema.Comment, origComments []*schema.CommentMD5, articleCreateTime types.NanoTS, articleMTime types.NanoTS) (edBlocks []*EDBlock, err error)
CalcEDBlocks
Must already guarantee that: 1. articleCreateTime < all origComments.SortTime 2. articleMTime >= all origComments.SortTime 3. origComments are sorted by SortTime 4. newComments are sorted by the line-idx.
func (*EDBlock) AlignEndNanoTS ¶ added in v0.15.0
func (ed *EDBlock) AlignEndNanoTS()
func (*EDBlock) BackwardInferTS ¶ added in v0.15.0
BackwardInferTS
func (*EDBlock) ForwardInferTS ¶ added in v0.15.0
ForwardInferTS
func (*EDBlock) InferTimestamp ¶ added in v0.15.0
func (ed *EDBlock) InferTimestamp(articleCreateTime types.NanoTS, isForwardOnly bool, isLastAlignEndNanoTS bool)
InferTimestamp
- OrigComments are sorted between ed.StartNanoTS and ed.EndNanoTS
- It's possible that the newComments are with out-of-range time.
- It's possible that multiple comments shares the same date-str, but we still need some way to make the timestamp unique.
- The time from OrigComments should not be moved.
The possibilities that new-comments are in between original-comments: XXX 1. delete (try to map the corresponding deleted messages)
We don't do this to simplify mapping sequence.
2. reply (previous-appearing-message (currentNanoTS in same or newComments) + REPLY_STEP_NANO_TS) 3. new messages. (sort-time should be after the deleted-messages) 4. others (the owners accidentally edited something, sort-time should be after the deleted-messages)
type EDInfo ¶ added in v0.15.0
type EDInfo struct { Op EDOp NewComment *schema.Comment // SAME/DELETE: origComments, ADD: newComments OrigComment *schema.CommentMD5 SortTime types.NanoTS }
func NewEDInfoFromAddComment ¶ added in v0.15.0
func NewEDInfoFromDeleteComment ¶ added in v0.15.0
func NewEDInfoFromDeleteComment(commentMD5 *schema.CommentMD5) (edInfo *EDInfo)
func NewEDInfoFromSameComment ¶ added in v0.15.0
func NewEDInfoFromSameComment(newComment *schema.Comment, origCommentMD5 *schema.CommentMD5) (edInfo *EDInfo)
type EDInfoMeta ¶ added in v0.15.0
type EDInfoMeta struct { // StartNanoTS (not included) StartNanoTS types.NanoTS // EndNanoTS (not included except the last ed-info) EndNanoTS types.NanoTS // StartIdx (included) StartIdx int // EndIdx (not incldued) EndIdx int }
func (*EDInfoMeta) ToEDBlock ¶ added in v0.15.0
func (meta *EDInfoMeta) ToEDBlock(edInfos []*EDInfo) (edBlock *EDBlock)
ToEDBlock
Given the list of edInfos, where NewComments are OrigComments are already separately sorted, construct the corresponding ed-block.
type INFER_TIMESTAMP_TYPE ¶ added in v0.15.0
type INFER_TIMESTAMP_TYPE uint8
const ( INFER_TIMESTAMP_INVALID INFER_TIMESTAMP_TYPE = 0 INFER_TIMESTAMP_YMDHM INFER_TIMESTAMP_TYPE = 1 INFER_TIMESTAMP_YMD INFER_TIMESTAMP_TYPE = 2 )