diff --git a/cdc/api/open.go b/cdc/api/open.go index 7091a5c397d..c2c8040591b 100644 --- a/cdc/api/open.go +++ b/cdc/api/open.go @@ -151,7 +151,7 @@ func (h *openAPI) ListChangefeed(c *gin.Context) { } resp := &model.ChangefeedCommonInfo{ - ID: cfID, + //ID: cfID, } if cfInfo != nil { @@ -181,60 +181,60 @@ func (h *openAPI) ListChangefeed(c *gin.Context) { // @Failure 500,400 {object} model.HTTPError // @Router /api/v1/changefeeds/{changefeed_id} [get] func (h *openAPI) GetChangefeed(c *gin.Context) { - if !h.capture.IsOwner() { - h.forwardToOwner(c) - return - } - - ctx := c.Request.Context() - changefeedID := c.Param(apiOpVarChangefeedID) - if err := model.ValidateChangefeedID(changefeedID); err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) - return - } - - info, err := h.statusProvider().GetChangeFeedInfo(ctx, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - - status, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - - processorInfos, err := h.statusProvider().GetAllTaskStatuses(ctx, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - - taskStatus := make([]model.CaptureTaskStatus, 0, len(processorInfos)) - for captureID, status := range processorInfos { - tables := make([]int64, 0) - for tableID := range status.Tables { - tables = append(tables, tableID) - } - taskStatus = append(taskStatus, model.CaptureTaskStatus{CaptureID: captureID, Tables: tables, Operation: status.Operation}) - } - - changefeedDetail := &model.ChangefeedDetail{ - ID: changefeedID, - SinkURI: info.SinkURI, - CreateTime: model.JSONTime(info.CreateTime), - StartTs: info.StartTs, - TargetTs: info.TargetTs, - CheckpointTSO: status.CheckpointTs, - CheckpointTime: model.JSONTime(oracle.GetTimeFromTS(status.CheckpointTs)), - ResolvedTs: status.ResolvedTs, - Engine: info.Engine, - FeedState: info.State, - TaskStatus: taskStatus, - } - - c.IndentedJSON(http.StatusOK, changefeedDetail) + //if !h.capture.IsOwner() { + // h.forwardToOwner(c) + // return + //} + // + //ctx := c.Request.Context() + //changefeedID := c.Param(apiOpVarChangefeedID) + //if err := model.ValidateChangefeedID(changefeedID); err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) + // return + //} + // + //info, err := h.statusProvider().GetChangeFeedInfo(ctx, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //status, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //processorInfos, err := h.statusProvider().GetAllTaskStatuses(ctx, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //taskStatus := make([]model.CaptureTaskStatus, 0, len(processorInfos)) + //for captureID, status := range processorInfos { + // tables := make([]int64, 0) + // for tableID := range status.Tables { + // tables = append(tables, tableID) + // } + // taskStatus = append(taskStatus, model.CaptureTaskStatus{CaptureID: captureID, Tables: tables, Operation: status.Operation}) + //} + // + //changefeedDetail := &model.ChangefeedDetail{ + // ID: changefeedID, + // SinkURI: info.SinkURI, + // CreateTime: model.JSONTime(info.CreateTime), + // StartTs: info.StartTs, + // TargetTs: info.TargetTs, + // CheckpointTSO: status.CheckpointTs, + // CheckpointTime: model.JSONTime(oracle.GetTimeFromTS(status.CheckpointTs)), + // ResolvedTs: status.ResolvedTs, + // Engine: info.Engine, + // FeedState: info.State, + // TaskStatus: taskStatus, + //} + // + //c.IndentedJSON(http.StatusOK, changefeedDetail) } // CreateChangefeed creates a changefeed @@ -248,38 +248,38 @@ func (h *openAPI) GetChangefeed(c *gin.Context) { // @Failure 500,400 {object} model.HTTPError // @Router /api/v1/changefeeds [post] func (h *openAPI) CreateChangefeed(c *gin.Context) { - if !h.capture.IsOwner() { - h.forwardToOwner(c) - return - } - - ctx := c.Request.Context() - var changefeedConfig model.ChangefeedConfig - if err := c.BindJSON(&changefeedConfig); err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.Wrap(err)) - return - } - - info, err := verifyCreateChangefeedConfig(c, changefeedConfig, h.capture) - if err != nil { - _ = c.Error(err) - return - } - - infoStr, err := info.Marshal() - if err != nil { - _ = c.Error(err) - return - } - - err = h.capture.EtcdClient.CreateChangefeedInfo(ctx, info, changefeedConfig.ID) - if err != nil { - _ = c.Error(err) - return - } - - log.Info("Create changefeed successfully!", zap.String("id", changefeedConfig.ID), zap.String("changefeed", infoStr)) - c.Status(http.StatusAccepted) + //if !h.capture.IsOwner() { + // h.forwardToOwner(c) + // return + //} + // + //ctx := c.Request.Context() + //var changefeedConfig model.ChangefeedConfig + //if err := c.BindJSON(&changefeedConfig); err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.Wrap(err)) + // return + //} + // + //info, err := verifyCreateChangefeedConfig(c, changefeedConfig, h.capture) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //infoStr, err := info.Marshal() + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //err = h.capture.EtcdClient.CreateChangefeedInfo(ctx, info, changefeedConfig.ID) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //log.Info("Create changefeed successfully!", zap.String("id", changefeedConfig.ID), zap.String("changefeed", infoStr)) + //c.Status(http.StatusAccepted) } // PauseChangefeed pauses a changefeed @@ -293,35 +293,35 @@ func (h *openAPI) CreateChangefeed(c *gin.Context) { // @Failure 500,400 {object} model.HTTPError // @Router /api/v1/changefeeds/{changefeed_id}/pause [post] func (h *openAPI) PauseChangefeed(c *gin.Context) { - if !h.capture.IsOwner() { - h.forwardToOwner(c) - return - } - - ctx := c.Request.Context() - - changefeedID := c.Param(apiOpVarChangefeedID) - if err := model.ValidateChangefeedID(changefeedID); err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) - return - } - // check if the changefeed exists - _, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - - job := model.AdminJob{ - CfID: changefeedID, - Type: model.AdminStop, - } - - if err := handleOwnerJob(ctx, h.capture, job); err != nil { - _ = c.Error(err) - return - } - c.Status(http.StatusAccepted) + //if !h.capture.IsOwner() { + // h.forwardToOwner(c) + // return + //} + // + //ctx := c.Request.Context() + // + //changefeedID := c.Param(apiOpVarChangefeedID) + //if err := model.ValidateChangefeedID(changefeedID); err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) + // return + //} + //// check if the changefeed exists + //_, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //job := model.AdminJob{ + // CfID: changefeedID, + // Type: model.AdminStop, + //} + // + //if err := handleOwnerJob(ctx, h.capture, job); err != nil { + // _ = c.Error(err) + // return + //} + //c.Status(http.StatusAccepted) } // ResumeChangefeed resumes a changefeed @@ -335,34 +335,34 @@ func (h *openAPI) PauseChangefeed(c *gin.Context) { // @Failure 500,400 {object} model.HTTPError // @Router /api/v1/changefeeds/{changefeed_id}/resume [post] func (h *openAPI) ResumeChangefeed(c *gin.Context) { - if !h.capture.IsOwner() { - h.forwardToOwner(c) - return - } - - ctx := c.Request.Context() - changefeedID := c.Param(apiOpVarChangefeedID) - if err := model.ValidateChangefeedID(changefeedID); err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) - return - } - // check if the changefeed exists - _, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - - job := model.AdminJob{ - CfID: changefeedID, - Type: model.AdminResume, - } - - if err := handleOwnerJob(ctx, h.capture, job); err != nil { - _ = c.Error(err) - return - } - c.Status(http.StatusAccepted) + //if !h.capture.IsOwner() { + // h.forwardToOwner(c) + // return + //} + // + //ctx := c.Request.Context() + //changefeedID := c.Param(apiOpVarChangefeedID) + //if err := model.ValidateChangefeedID(changefeedID); err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) + // return + //} + //// check if the changefeed exists + //_, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //job := model.AdminJob{ + // CfID: changefeedID, + // Type: model.AdminResume, + //} + // + //if err := handleOwnerJob(ctx, h.capture, job); err != nil { + // _ = c.Error(err) + // return + //} + //c.Status(http.StatusAccepted) } // UpdateChangefeed updates a changefeed @@ -382,49 +382,49 @@ func (h *openAPI) ResumeChangefeed(c *gin.Context) { // @Failure 500,400 {object} model.HTTPError // @Router /api/v1/changefeeds/{changefeed_id} [put] func (h *openAPI) UpdateChangefeed(c *gin.Context) { - if !h.capture.IsOwner() { - h.forwardToOwner(c) - return - } - - ctx := c.Request.Context() - changefeedID := c.Param(apiOpVarChangefeedID) - - if err := model.ValidateChangefeedID(changefeedID); err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) - return - } - info, err := h.statusProvider().GetChangeFeedInfo(ctx, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - if info.State != model.StateStopped { - _ = c.Error(cerror.ErrChangefeedUpdateRefused.GenWithStackByArgs("can only update changefeed config when it is stopped")) - return - } - - // can only update target-ts, sink-uri - // filter_rules, ignore_txn_start_ts, mounter_worker_num, sink_config - var changefeedConfig model.ChangefeedConfig - if err = c.BindJSON(&changefeedConfig); err != nil { - _ = c.Error(err) - return - } - - newInfo, err := verifyUpdateChangefeedConfig(ctx, changefeedConfig, info) - if err != nil { - _ = c.Error(err) - return - } - - err = h.capture.EtcdClient.SaveChangeFeedInfo(ctx, newInfo, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - - c.Status(http.StatusAccepted) + //if !h.capture.IsOwner() { + // h.forwardToOwner(c) + // return + //} + // + //ctx := c.Request.Context() + //changefeedID := c.Param(apiOpVarChangefeedID) + // + //if err := model.ValidateChangefeedID(changefeedID); err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) + // return + //} + //info, err := h.statusProvider().GetChangeFeedInfo(ctx, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + //if info.State != model.StateStopped { + // _ = c.Error(cerror.ErrChangefeedUpdateRefused.GenWithStackByArgs("can only update changefeed config when it is stopped")) + // return + //} + // + //// can only update target-ts, sink-uri + //// filter_rules, ignore_txn_start_ts, mounter_worker_num, sink_config + //var changefeedConfig model.ChangefeedConfig + //if err = c.BindJSON(&changefeedConfig); err != nil { + // _ = c.Error(err) + // return + //} + // + //newInfo, err := verifyUpdateChangefeedConfig(ctx, changefeedConfig, info) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //err = h.capture.EtcdClient.SaveChangeFeedInfo(ctx, newInfo, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //c.Status(http.StatusAccepted) } // RemoveChangefeed removes a changefeed @@ -438,34 +438,34 @@ func (h *openAPI) UpdateChangefeed(c *gin.Context) { // @Failure 500,400 {object} model.HTTPError // @Router /api/v1/changefeeds/{changefeed_id} [delete] func (h *openAPI) RemoveChangefeed(c *gin.Context) { - if !h.capture.IsOwner() { - h.forwardToOwner(c) - return - } - - ctx := c.Request.Context() - changefeedID := c.Param(apiOpVarChangefeedID) - if err := model.ValidateChangefeedID(changefeedID); err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) - return - } - // check if the changefeed exists - _, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - - job := model.AdminJob{ - CfID: changefeedID, - Type: model.AdminRemove, - } - - if err := handleOwnerJob(ctx, h.capture, job); err != nil { - _ = c.Error(err) - return - } - c.Status(http.StatusAccepted) + //if !h.capture.IsOwner() { + // h.forwardToOwner(c) + // return + //} + // + //ctx := c.Request.Context() + //changefeedID := c.Param(apiOpVarChangefeedID) + //if err := model.ValidateChangefeedID(changefeedID); err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) + // return + //} + //// check if the changefeed exists + //_, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //job := model.AdminJob{ + // CfID: changefeedID, + // Type: model.AdminRemove, + //} + // + //if err := handleOwnerJob(ctx, h.capture, job); err != nil { + // _ = c.Error(err) + // return + //} + //c.Status(http.StatusAccepted) } // RebalanceTables rebalances tables @@ -479,30 +479,30 @@ func (h *openAPI) RemoveChangefeed(c *gin.Context) { // @Failure 500,400 {object} model.HTTPError // @Router /api/v1/changefeeds/{changefeed_id}/tables/rebalance_table [post] func (h *openAPI) RebalanceTables(c *gin.Context) { - if !h.capture.IsOwner() { - h.forwardToOwner(c) - return - } - - ctx := c.Request.Context() - changefeedID := c.Param(apiOpVarChangefeedID) - - if err := model.ValidateChangefeedID(changefeedID); err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) - return - } - // check if the changefeed exists - _, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - - if err := handleOwnerRebalance(ctx, h.capture, changefeedID); err != nil { - _ = c.Error(err) - return - } - c.Status(http.StatusAccepted) + //if !h.capture.IsOwner() { + // h.forwardToOwner(c) + // return + //} + // + //ctx := c.Request.Context() + //changefeedID := c.Param(apiOpVarChangefeedID) + // + //if err := model.ValidateChangefeedID(changefeedID); err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) + // return + //} + //// check if the changefeed exists + //_, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //if err := handleOwnerRebalance(ctx, h.capture, changefeedID); err != nil { + // _ = c.Error(err) + // return + //} + //c.Status(http.StatusAccepted) } // MoveTable moves a table to target capture @@ -518,46 +518,46 @@ func (h *openAPI) RebalanceTables(c *gin.Context) { // @Failure 500,400 {object} model.HTTPError // @Router /api/v1/changefeeds/{changefeed_id}/tables/move_table [post] func (h *openAPI) MoveTable(c *gin.Context) { - if !h.capture.IsOwner() { - h.forwardToOwner(c) - return - } - - ctx := c.Request.Context() - changefeedID := c.Param(apiOpVarChangefeedID) - if err := model.ValidateChangefeedID(changefeedID); err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) - return - } - // check if the changefeed exists - _, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - - data := struct { - CaptureID string `json:"capture_id"` - TableID int64 `json:"table_id"` - }{} - err = c.BindJSON(&data) - if err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.Wrap(err)) - return - } - - if err := model.ValidateChangefeedID(data.CaptureID); err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid capture_id: %s", data.CaptureID)) - return - } - - err = handleOwnerScheduleTable( - ctx, h.capture, changefeedID, data.CaptureID, data.TableID) - if err != nil { - _ = c.Error(err) - return - } - c.Status(http.StatusAccepted) + //if !h.capture.IsOwner() { + // h.forwardToOwner(c) + // return + //} + // + //ctx := c.Request.Context() + //changefeedID := c.Param(apiOpVarChangefeedID) + //if err := model.ValidateChangefeedID(changefeedID); err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) + // return + //} + //// check if the changefeed exists + //_, err := h.statusProvider().GetChangeFeedStatus(ctx, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + // + //data := struct { + // CaptureID string `json:"capture_id"` + // TableID int64 `json:"table_id"` + //}{} + //err = c.BindJSON(&data) + //if err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.Wrap(err)) + // return + //} + // + //if err := model.ValidateChangefeedID(data.CaptureID); err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid capture_id: %s", data.CaptureID)) + // return + //} + // + //err = handleOwnerScheduleTable( + // ctx, h.capture, changefeedID, data.CaptureID, data.TableID) + //if err != nil { + // _ = c.Error(err) + // return + //} + //c.Status(http.StatusAccepted) } // ResignOwner makes the current owner resign @@ -593,73 +593,73 @@ func (h *openAPI) ResignOwner(c *gin.Context) { // @Failure 500,400 {object} model.HTTPError // @Router /api/v1/processors/{changefeed_id}/{capture_id} [get] func (h *openAPI) GetProcessor(c *gin.Context) { - if !h.capture.IsOwner() { - h.forwardToOwner(c) - return - } - - ctx := c.Request.Context() - - changefeedID := c.Param(apiOpVarChangefeedID) - if err := model.ValidateChangefeedID(changefeedID); err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) - return - } - - captureID := c.Param(apiOpVarCaptureID) - if err := model.ValidateChangefeedID(captureID); err != nil { - _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid capture_id: %s", captureID)) - return - } - - // check if this captureID exist - procInfos, err := h.statusProvider().GetProcessors(ctx) - if err != nil { - _ = c.Error(err) - return - } - var found bool - for _, info := range procInfos { - if info.CaptureID == captureID { - found = true - break - } - } - if !found { - _ = c.Error(cerror.ErrCaptureNotExist.GenWithStackByArgs(captureID)) - return - } - - statuses, err := h.statusProvider().GetAllTaskStatuses(ctx, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - status, captureExist := statuses[captureID] - - positions, err := h.statusProvider().GetTaskPositions(ctx, changefeedID) - if err != nil { - _ = c.Error(err) - return - } - position, positionsExist := positions[captureID] - // Note: for the case that no tables are attached to a newly created changefeed, - // we just do not report an error. - var processorDetail model.ProcessorDetail - if captureExist && positionsExist { - processorDetail = model.ProcessorDetail{ - CheckPointTs: position.CheckPointTs, - ResolvedTs: position.ResolvedTs, - Count: position.Count, - Error: position.Error, - } - tables := make([]int64, 0) - for tableID := range status.Tables { - tables = append(tables, tableID) - } - processorDetail.Tables = tables - } - c.IndentedJSON(http.StatusOK, &processorDetail) + //if !h.capture.IsOwner() { + // h.forwardToOwner(c) + // return + //} + // + //ctx := c.Request.Context() + // + //changefeedID := c.Param(apiOpVarChangefeedID) + //if err := model.ValidateChangefeedID(changefeedID); err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedID)) + // return + //} + // + //captureID := c.Param(apiOpVarCaptureID) + //if err := model.ValidateChangefeedID(captureID); err != nil { + // _ = c.Error(cerror.ErrAPIInvalidParam.GenWithStack("invalid capture_id: %s", captureID)) + // return + //} + // + //// check if this captureID exist + //procInfos, err := h.statusProvider().GetProcessors(ctx) + //if err != nil { + // _ = c.Error(err) + // return + //} + //var found bool + //for _, info := range procInfos { + // if info.CaptureID == captureID { + // found = true + // break + // } + //} + //if !found { + // _ = c.Error(cerror.ErrCaptureNotExist.GenWithStackByArgs(captureID)) + // return + //} + // + //statuses, err := h.statusProvider().GetAllTaskStatuses(ctx, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + //status, captureExist := statuses[captureID] + // + //positions, err := h.statusProvider().GetTaskPositions(ctx, changefeedID) + //if err != nil { + // _ = c.Error(err) + // return + //} + //position, positionsExist := positions[captureID] + //// Note: for the case that no tables are attached to a newly created changefeed, + //// we just do not report an error. + //var processorDetail model.ProcessorDetail + //if captureExist && positionsExist { + // processorDetail = model.ProcessorDetail{ + // CheckPointTs: position.CheckPointTs, + // ResolvedTs: position.ResolvedTs, + // Count: position.Count, + // Error: position.Error, + // } + // tables := make([]int64, 0) + // for tableID := range status.Tables { + // tables = append(tables, tableID) + // } + // processorDetail.Tables = tables + //} + //c.IndentedJSON(http.StatusOK, &processorDetail) } // ListProcessor lists all processors in the TiCDC cluster @@ -672,23 +672,23 @@ func (h *openAPI) GetProcessor(c *gin.Context) { // @Failure 500,400 {object} model.HTTPError // @Router /api/v1/processors [get] func (h *openAPI) ListProcessor(c *gin.Context) { - if !h.capture.IsOwner() { - h.forwardToOwner(c) - return - } - - ctx := c.Request.Context() - infos, err := h.statusProvider().GetProcessors(ctx) - if err != nil { - _ = c.Error(err) - return - } - resps := make([]*model.ProcessorCommonInfo, len(infos)) - for i, info := range infos { - resp := &model.ProcessorCommonInfo{CfID: info.CfID, CaptureID: info.CaptureID} - resps[i] = resp - } - c.IndentedJSON(http.StatusOK, resps) + //if !h.capture.IsOwner() { + // h.forwardToOwner(c) + // return + //} + // + //ctx := c.Request.Context() + //infos, err := h.statusProvider().GetProcessors(ctx) + //if err != nil { + // _ = c.Error(err) + // return + //} + //resps := make([]*model.ProcessorCommonInfo, len(infos)) + //for i, info := range infos { + // resp := &model.ProcessorCommonInfo{CfID: info.CfID, CaptureID: info.CaptureID} + // resps[i] = resp + //} + //c.IndentedJSON(http.StatusOK, resps) } // ListCapture lists all captures diff --git a/cdc/api/owner.go b/cdc/api/owner.go index c11a9cd10df..94bf3b895d6 100644 --- a/cdc/api/owner.go +++ b/cdc/api/owner.go @@ -148,7 +148,7 @@ func (h *ownerAPI) handleChangefeedAdmin(w http.ResponseWriter, req *http.Reques opts.ForceRemove = forceRemoveOpt } job := model.AdminJob{ - CfID: req.Form.Get(OpVarChangefeedID), + CfID: model.ChangeFeedID{req.Form.Get("namespace"), req.Form.Get(OpVarChangefeedID)}, Type: model.AdminJobType(typ), Opts: opts, } @@ -169,8 +169,8 @@ func (h *ownerAPI) handleRebalanceTrigger(w http.ResponseWriter, req *http.Reque writeError(w, http.StatusInternalServerError, err) return } - changefeedID := req.Form.Get(OpVarChangefeedID) - if err := model.ValidateChangefeedID(changefeedID); err != nil { + changefeedID := model.ChangeFeedID{"default", req.Form.Get(OpVarChangefeedID)} + if err := model.ValidateChangefeedID(changefeedID.ID); err != nil { writeError(w, http.StatusBadRequest, cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed id: %s", changefeedID)) return @@ -193,8 +193,8 @@ func (h *ownerAPI) handleMoveTable(w http.ResponseWriter, req *http.Request) { cerror.WrapError(cerror.ErrInternalServerError, err)) return } - changefeedID := req.Form.Get(OpVarChangefeedID) - if err := model.ValidateChangefeedID(changefeedID); err != nil { + changefeedID := model.ChangeFeedID{"default", req.Form.Get(OpVarChangefeedID)} + if err := model.ValidateChangefeedID(changefeedID.ID); err != nil { writeError(w, http.StatusBadRequest, cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed id: %s", changefeedID)) return @@ -230,8 +230,8 @@ func (h *ownerAPI) handleChangefeedQuery(w http.ResponseWriter, req *http.Reques writeError(w, http.StatusInternalServerError, err) return } - changefeedID := req.Form.Get(OpVarChangefeedID) - if err := model.ValidateChangefeedID(changefeedID); err != nil { + changefeedID := model.ChangeFeedID{req.Form.Get("namespace"), req.Form.Get(OpVarChangefeedID)} + if err := model.ValidateChangefeedID(changefeedID.ID); err != nil { writeError(w, http.StatusBadRequest, cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed id: %s", changefeedID)) return diff --git a/cdc/api/status.go b/cdc/api/status.go index 79d9da7537d..8e9b7aa80e1 100644 --- a/cdc/api/status.go +++ b/cdc/api/status.go @@ -48,7 +48,7 @@ func RegisterStatusAPIRoutes(router *gin.Engine, capture *capture.Capture) { } func (h *statusAPI) writeEtcdInfo(ctx context.Context, cli *etcd.CDCEtcdClient, w io.Writer) { - resp, err := cli.Client.Get(ctx, etcd.EtcdKeyBase, clientv3.WithPrefix()) + resp, err := cli.Client.Get(ctx, etcd.EtcdKeyBase(), clientv3.WithPrefix()) if err != nil { fmt.Fprintf(w, "failed to get info: %s\n\n", err.Error()) return diff --git a/cdc/api/util.go b/cdc/api/util.go index 7f9650972a0..a6eba74a128 100644 --- a/cdc/api/util.go +++ b/cdc/api/util.go @@ -99,7 +99,7 @@ func handleOwnerJob( } func handleOwnerRebalance( - ctx context.Context, capture *capture.Capture, changefeedID string, + ctx context.Context, capture *capture.Capture, changefeedID model.ChangeFeedID, ) error { // Use buffered channel to prevernt blocking owner. done := make(chan error, 1) @@ -118,7 +118,7 @@ func handleOwnerRebalance( func handleOwnerScheduleTable( ctx context.Context, capture *capture.Capture, - changefeedID string, captureID string, tableID int64, + changefeedID model.ChangeFeedID, captureID string, tableID int64, ) error { // Use buffered channel to prevernt blocking owner. done := make(chan error, 1) diff --git a/cdc/api/validator.go b/cdc/api/validator.go index f2f053e0f17..22edd111e41 100644 --- a/cdc/api/validator.go +++ b/cdc/api/validator.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/log" tidbkv "github.com/pingcap/tidb/kv" "github.com/pingcap/tiflow/cdc/capture" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/entry/schema" "github.com/pingcap/tiflow/cdc/kv" "github.com/pingcap/tiflow/cdc/model" @@ -28,7 +29,7 @@ import ( "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/filter" - "github.com/pingcap/tiflow/pkg/txnutil/gc" + "github.com/pingcap/tiflow/pkg/upstream" "github.com/pingcap/tiflow/pkg/util" "github.com/pingcap/tiflow/pkg/version" "github.com/r3labs/diff" @@ -41,6 +42,13 @@ func verifyCreateChangefeedConfig( changefeedConfig model.ChangefeedConfig, capture *capture.Capture, ) (*model.ChangeFeedInfo, error) { + upStream, err := upstream.UpStreamManager.Get("") + defer upstream.UpStreamManager.Release(0) + + if err != nil { + return nil, errors.Trace(err) + } + // verify sinkURI if changefeedConfig.SinkURI == "" { return nil, cerror.ErrSinkURIInvalid.GenWithStackByArgs("sink-uri is empty, can't not create a changefeed without sink-uri") @@ -51,17 +59,17 @@ func verifyCreateChangefeedConfig( return nil, cerror.ErrAPIInvalidParam.GenWithStack("invalid changefeed_id: %s", changefeedConfig.ID) } // check if the changefeed exists - cfStatus, err := capture.StatusProvider().GetChangeFeedStatus(ctx, changefeedConfig.ID) - if err != nil && cerror.ErrChangeFeedNotExists.NotEqual(err) { - return nil, err - } - if cfStatus != nil { - return nil, cerror.ErrChangeFeedAlreadyExists.GenWithStackByArgs(changefeedConfig.ID) - } + //cfStatus, err := capture.StatusProvider().GetChangeFeedStatus(ctx, changefeedConfig.ID) + //if err != nil && cerror.ErrChangeFeedNotExists.NotEqual(err) { + // return nil, err + //} + //if cfStatus != nil { + // return nil, cerror.ErrChangeFeedAlreadyExists.GenWithStackByArgs(changefeedConfig.ID) + //} // verify start-ts if changefeedConfig.StartTS == 0 { - ts, logical, err := capture.PDClient.GetTS(ctx) + ts, logical, err := upStream.PDClient.GetTS(ctx) if err != nil { return nil, cerror.ErrPDEtcdAPIError.GenWithStackByArgs("fail to get ts from pd client") } @@ -70,13 +78,13 @@ func verifyCreateChangefeedConfig( // Ensure the start ts is valid in the next 1 hour. const ensureTTL = 60 * 60 - if err := gc.EnsureChangefeedStartTsSafety( - ctx, capture.PDClient, changefeedConfig.ID, ensureTTL, changefeedConfig.StartTS); err != nil { - if !cerror.ErrStartTsBeforeGC.Equal(err) { - return nil, cerror.ErrPDEtcdAPIError.Wrap(err) - } - return nil, err - } + //if err := gc.EnsureChangefeedStartTsSafety( + // ctx, upStream.PDClient, changefeedConfig.ID, ensureTTL, changefeedConfig.StartTS); err != nil { + // if !cerror.ErrStartTsBeforeGC.Equal(err) { + // return nil, cerror.ErrPDEtcdAPIError.Wrap(err) + // } + // return nil, err + //} // verify target-ts if changefeedConfig.TargetTS > 0 && changefeedConfig.TargetTS <= changefeedConfig.StartTS { @@ -133,7 +141,7 @@ func verifyCreateChangefeedConfig( } if !replicaConfig.ForceReplicate && !changefeedConfig.IgnoreIneligibleTable { - ineligibleTables, _, err := VerifyTables(replicaConfig, capture.Storage, changefeedConfig.StartTS) + ineligibleTables, _, err := VerifyTables(replicaConfig, upStream.KVStorage, changefeedConfig.StartTS) if err != nil { return nil, err } @@ -146,7 +154,7 @@ func verifyCreateChangefeedConfig( if err != nil { return nil, cerror.ErrAPIInvalidParam.Wrap(errors.Annotatef(err, "invalid timezone:%s", changefeedConfig.TimeZone)) } - ctx = util.PutTimezoneInCtx(ctx, tz) + ctx = contextutil.PutTimezoneInCtx(ctx, tz) if err := sink.Validate(ctx, info.SinkURI, info.Config, info.Opts); err != nil { return nil, err } diff --git a/cdc/capture/capture.go b/cdc/capture/capture.go index 7daaa5ba027..4d8b12301b5 100644 --- a/cdc/capture/capture.go +++ b/cdc/capture/capture.go @@ -25,14 +25,12 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/log" tidbkv "github.com/pingcap/tidb/kv" - "github.com/tikv/client-go/v2/tikv" pd "github.com/tikv/pd/client" "go.etcd.io/etcd/client/v3/concurrency" "go.etcd.io/etcd/server/v3/mvcc" "go.uber.org/zap" "golang.org/x/time/rate" - "github.com/pingcap/tiflow/cdc/kv" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/owner" "github.com/pingcap/tiflow/cdc/processor" @@ -44,7 +42,6 @@ import ( "github.com/pingcap/tiflow/pkg/etcd" "github.com/pingcap/tiflow/pkg/orchestrator" "github.com/pingcap/tiflow/pkg/p2p" - "github.com/pingcap/tiflow/pkg/pdtime" "github.com/pingcap/tiflow/pkg/version" ) @@ -62,12 +59,12 @@ type Capture struct { session *concurrency.Session election *concurrency.Election - PDClient pd.Client - Storage tidbkv.Storage - EtcdClient *etcd.CDCEtcdClient - grpcPool kv.GrpcPool - regionCache *tikv.RegionCache - pdClock *pdtime.PDClock + // PDClient pd.Client + // Storage tidbkv.Storage + EtcdClient *etcd.CDCEtcdClient + // grpcPool kv.GrpcPool + // regionCache *tikv.RegionCache + // pdClock *pdtime.PDClock sorterSystem *ssystem.System enableNewScheduler bool @@ -90,15 +87,15 @@ type Capture struct { cancel context.CancelFunc newProcessorManager func() *processor.Manager - newOwner func(pd.Client) owner.Owner + newOwner func() owner.Owner } // NewCapture returns a new Capture instance func NewCapture(pdClient pd.Client, kvStorage tidbkv.Storage, etcdClient *etcd.CDCEtcdClient, grpcService *p2p.ServerWrapper) *Capture { conf := config.GetGlobalServerConfig() return &Capture{ - PDClient: pdClient, - Storage: kvStorage, + // PDClient: pdClient, + // Storage: kvStorage, EtcdClient: etcdClient, grpcService: grpcService, cancel: func() {}, @@ -141,16 +138,16 @@ func (c *Capture) reset(ctx context.Context) error { _ = c.session.Close() } c.session = sess - c.election = concurrency.NewElection(sess, etcd.CaptureOwnerKey) + c.election = concurrency.NewElection(sess, etcd.CaptureOwnerKey()) - if c.pdClock != nil { - c.pdClock.Stop() - } + // if c.pdClock != nil { + // c.pdClock.Stop() + // } - c.pdClock, err = pdtime.NewClock(ctx, c.PDClient) - if err != nil { - return errors.Trace(err) - } + // c.pdClock, err = pdtime.NewClock(ctx, c.PDClient) + // if err != nil { + // return errors.Trace(err) + // } if c.tableActorSystem != nil { c.tableActorSystem.Stop() @@ -183,9 +180,9 @@ func (c *Capture) reset(ctx context.Context) error { "create sorter system") } } - if c.grpcPool != nil { - c.grpcPool.Close() - } + // if c.grpcPool != nil { + // c.grpcPool.Close() + // } if c.enableNewScheduler { c.grpcService.Reset(nil) @@ -197,11 +194,11 @@ func (c *Capture) reset(ctx context.Context) error { } } - c.grpcPool = kv.NewGrpcPoolImpl(ctx, conf.Security) - if c.regionCache != nil { - c.regionCache.Close() - } - c.regionCache = tikv.NewRegionCache(c.PDClient) + // c.grpcPool = kv.NewGrpcPoolImpl(ctx, conf.Security) + // if c.regionCache != nil { + // c.regionCache.Close() + // } + // c.regionCache = tikv.NewRegionCache(c.PDClient) if c.enableNewScheduler { messageServerConfig := conf.Debug.Messages.ToMessageServerConfig() @@ -265,13 +262,13 @@ func (c *Capture) Run(ctx context.Context) error { func (c *Capture) run(stdCtx context.Context) error { ctx := cdcContext.NewContext(stdCtx, &cdcContext.GlobalVars{ - PDClient: c.PDClient, - KVStorage: c.Storage, - CaptureInfo: c.info, - EtcdClient: c.EtcdClient, - GrpcPool: c.grpcPool, - RegionCache: c.regionCache, - PDClock: c.pdClock, + // PDClient: c.PDClient, + // KVStorage: c.Storage, + CaptureInfo: c.info, + EtcdClient: c.EtcdClient, + // GrpcPool: c.grpcPool, + // RegionCache: c.regionCache, + // PDClock: c.pdClock, TableActorSystem: c.tableActorSystem, SorterSystem: c.sorterSystem, MessageServer: c.MessageServer, @@ -324,16 +321,16 @@ func (c *Capture) run(stdCtx context.Context) error { processorErr = c.runEtcdWorker(ctx, c.processorManager, globalState, processorFlushInterval, "processor") log.Info("the processor routine has exited", zap.Error(processorErr)) }() - wg.Add(1) - go func() { - defer wg.Done() - c.pdClock.Run(ctx) - }() - wg.Add(1) - go func() { - defer wg.Done() - c.grpcPool.RecycleConn(ctx) - }() + // wg.Add(1) + // go func() { + // defer wg.Done() + // c.pdClock.Run(ctx) + // }() + // wg.Add(1) + // go func() { + // defer wg.Done() + // c.grpcPool.RecycleConn(ctx) + // }() if c.enableNewScheduler { wg.Add(1) go func() { @@ -419,7 +416,7 @@ func (c *Capture) campaignOwner(ctx cdcContext.Context) error { zap.String("captureID", c.info.ID), zap.Int64("ownerRev", ownerRev)) - owner := c.newOwner(c.PDClient) + owner := c.newOwner() c.setOwner(owner) globalState := orchestrator.NewGlobalState() @@ -456,7 +453,7 @@ func (c *Capture) runEtcdWorker( timerInterval time.Duration, role string, ) error { - etcdWorker, err := orchestrator.NewEtcdWorker(ctx.GlobalVars().EtcdClient.Client, etcd.EtcdKeyBase, reactor, reactorState) + etcdWorker, err := orchestrator.NewEtcdWorker(ctx.GlobalVars().EtcdClient.Client, etcd.EtcdKeyBase(), reactor, reactorState) if err != nil { return errors.Trace(err) } @@ -541,13 +538,13 @@ func (c *Capture) AsyncClose() { if c.processorManager != nil { c.processorManager.AsyncClose() } - if c.grpcPool != nil { - c.grpcPool.Close() - } - if c.regionCache != nil { - c.regionCache.Close() - c.regionCache = nil - } + // if c.grpcPool != nil { + // c.grpcPool.Close() + // } + // if c.regionCache != nil { + // c.regionCache.Close() + // c.regionCache = nil + // } if c.tableActorSystem != nil { c.tableActorSystem.Stop() c.tableActorSystem = nil @@ -622,7 +619,7 @@ func (c *Capture) GetOwnerCaptureInfo(ctx context.Context) (*model.CaptureInfo, return nil, err } - ownerID, err := c.EtcdClient.GetOwnerID(ctx, etcd.CaptureOwnerKey) + ownerID, err := c.EtcdClient.GetOwnerID(ctx, etcd.CaptureOwnerKey()) if err != nil { return nil, err } diff --git a/pkg/util/ctx.go b/cdc/contextutil/ctx.go similarity index 87% rename from pkg/util/ctx.go rename to cdc/contextutil/ctx.go index 4f68af2cf33..65d4e2aba3d 100644 --- a/pkg/util/ctx.go +++ b/cdc/contextutil/ctx.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package util +package contextutil import ( "context" @@ -19,6 +19,8 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tidb/kv" + "github.com/pingcap/tiflow/cdc/model" + "github.com/pingcap/tiflow/pkg/util" "go.uber.org/zap" ) @@ -109,31 +111,31 @@ func IsOwnerFromCtx(ctx context.Context) bool { // ChangefeedIDFromCtx returns a changefeedID stored in the specified context. // It returns an empty string if there's no valid changefeed ID found. -func ChangefeedIDFromCtx(ctx context.Context) string { - changefeedID, ok := ctx.Value(ctxKeyChangefeedID).(string) +func ChangefeedIDFromCtx(ctx context.Context) model.ChangeFeedID { + changefeedID, ok := ctx.Value(ctxKeyChangefeedID).(model.ChangeFeedID) if !ok { - return "" + return model.ChangeFeedID{} } return changefeedID } // PutChangefeedIDInCtx returns a new child context with the specified changefeed ID stored. -func PutChangefeedIDInCtx(ctx context.Context, changefeedID string) context.Context { +func PutChangefeedIDInCtx(ctx context.Context, changefeedID model.ChangeFeedID) context.Context { return context.WithValue(ctx, ctxKeyChangefeedID, changefeedID) } // RoleFromCtx returns a role stored in the specified context. // It returns RoleUnknown if there's no valid role found -func RoleFromCtx(ctx context.Context) Role { - role, ok := ctx.Value(ctxKeyRole).(Role) +func RoleFromCtx(ctx context.Context) util.Role { + role, ok := ctx.Value(ctxKeyRole).(util.Role) if !ok { - return RoleUnknown + return util.RoleUnknown } return role } // PutRoleInCtx return a new child context with the specified role stored. -func PutRoleInCtx(ctx context.Context, role Role) context.Context { +func PutRoleInCtx(ctx context.Context, role util.Role) context.Context { return context.WithValue(ctx, ctxKeyRole, role) } @@ -145,5 +147,5 @@ func ZapFieldCapture(ctx context.Context) zap.Field { // ZapFieldChangefeed returns a zap field containing changefeed id func ZapFieldChangefeed(ctx context.Context) zap.Field { - return zap.String("changefeed", ChangefeedIDFromCtx(ctx)) + return zap.String("changefeed", ChangefeedIDFromCtx(ctx).String()) } diff --git a/pkg/util/ctx_test.go b/cdc/contextutil/ctx_test.go similarity index 97% rename from pkg/util/ctx_test.go rename to cdc/contextutil/ctx_test.go index 58bec96643c..0beba9bafc9 100644 --- a/pkg/util/ctx_test.go +++ b/cdc/contextutil/ctx_test.go @@ -11,13 +11,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -package util +package contextutil import ( "context" "testing" "github.com/pingcap/tidb/store/mockstore" + "github.com/pingcap/tiflow/pkg/util" "github.com/stretchr/testify/require" "go.uber.org/zap" ) @@ -58,7 +59,7 @@ func TestChangefeedIDNotSet(t *testing.T) { } func TestShouldReturnTimezone(t *testing.T) { - tz, _ := getTimezoneFromZonefile("UTC") + tz, _ := util.GetTimezoneFromZonefile("UTC") ctx := PutTimezoneInCtx(context.Background(), tz) tz = TimezoneFromCtx(ctx) require.Equal(t, "UTC", tz.String()) diff --git a/cdc/entry/schema_storage.go b/cdc/entry/schema_storage.go index 651e80a6617..e3f2a9a29ed 100644 --- a/cdc/entry/schema_storage.go +++ b/cdc/entry/schema_storage.go @@ -127,7 +127,7 @@ func (s *schemaStorageImpl) GetSnapshot(ctx context.Context, ts uint64) (*schema log.Warn("GetSnapshot is taking too long, DDL puller stuck?", zap.Uint64("ts", ts), zap.Duration("duration", now.Sub(startTime)), - zap.String("changefeed", s.id)) + zap.String("changefeed", s.id.String())) logTime = now } return err @@ -161,7 +161,7 @@ func (s *schemaStorageImpl) HandleDDLJob(job *timodel.Job) error { lastSnap := s.snaps[len(s.snaps)-1] if job.BinlogInfo.FinishedTS <= lastSnap.CurrentTs() { log.Info("ignore foregone DDL", zap.Int64("jobID", job.ID), - zap.String("DDL", job.Query), zap.String("changefeed", s.id), + zap.String("DDL", job.Query), zap.String("changefeed", s.id.String()), zap.Uint64("finishTs", job.BinlogInfo.FinishedTS)) return nil } @@ -172,11 +172,11 @@ func (s *schemaStorageImpl) HandleDDLJob(job *timodel.Job) error { if err := snap.HandleDDL(job); err != nil { log.Error("handle DDL failed", zap.String("DDL", job.Query), zap.Stringer("job", job), zap.Error(err), - zap.String("changefeed", s.id), zap.Uint64("finishTs", job.BinlogInfo.FinishedTS)) + zap.String("changefeed", s.id.String()), zap.Uint64("finishTs", job.BinlogInfo.FinishedTS)) return errors.Trace(err) } log.Info("handle DDL", zap.String("DDL", job.Query), - zap.Stringer("job", job), zap.String("changefeed", s.id), + zap.Stringer("job", job), zap.String("changefeed", s.id.String()), zap.Uint64("finishTs", job.BinlogInfo.FinishedTS)) s.snaps = append(s.snaps, snap) @@ -244,11 +244,11 @@ func (s *schemaStorageImpl) DoGC(ts uint64) (lastSchemaTs uint64) { func (s *schemaStorageImpl) skipJob(job *timodel.Job) bool { log.Debug("handle DDL new commit", zap.String("DDL", job.Query), zap.Stringer("job", job), - zap.String("changefeed", s.id)) + zap.String("changefeed", s.id.String())) if s.filter != nil && s.filter.ShouldDiscardDDL(job.Type) { log.Info("discard DDL", zap.Int64("jobID", job.ID), zap.String("DDL", job.Query), - zap.String("changefeed", s.id)) + zap.String("changefeed", s.id.String())) return true } return !job.IsSynced() && !job.IsDone() diff --git a/cdc/kv/client.go b/cdc/kv/client.go index bafbfbfa07c..b487f5c1d89 100644 --- a/cdc/kv/client.go +++ b/cdc/kv/client.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" @@ -36,7 +37,6 @@ import ( "github.com/pingcap/tiflow/pkg/regionspan" "github.com/pingcap/tiflow/pkg/retry" "github.com/pingcap/tiflow/pkg/txnutil" - "github.com/pingcap/tiflow/pkg/util" "github.com/pingcap/tiflow/pkg/version" "github.com/prometheus/client_golang/prometheus" tidbkv "github.com/tikv/client-go/v2/kv" @@ -545,7 +545,7 @@ func (s *eventFeedSession) eventFeed(ctx context.Context, ts uint64) error { } }) - tableID, tableName := util.TableIDFromCtx(ctx) + tableID, tableName := contextutil.TableIDFromCtx(ctx) g.Go(func() error { timer := time.NewTimer(defaultCheckRegionRateLimitInterval) defer timer.Stop() @@ -1149,8 +1149,8 @@ func (s *eventFeedSession) receiveFromStream( } }() - changefeedID := util.ChangefeedIDFromCtx(ctx) - metricSendEventBatchResolvedSize := batchResolvedEventSize.WithLabelValues(changefeedID) + changefeedID := contextutil.ChangefeedIDFromCtx(ctx) + metricSendEventBatchResolvedSize := batchResolvedEventSize.WithLabelValues(changefeedID.ID) // always create a new region worker, because `receiveFromStream` is ensured // to call exactly once from outter code logic diff --git a/cdc/kv/region_worker.go b/cdc/kv/region_worker.go index 55242b3fc70..66cc83c17b6 100644 --- a/cdc/kv/region_worker.go +++ b/cdc/kv/region_worker.go @@ -25,11 +25,11 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/cdcpb" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/regionspan" - "github.com/pingcap/tiflow/pkg/util" "github.com/pingcap/tiflow/pkg/workerpool" "github.com/prometheus/client_golang/prometheus" "github.com/tikv/client-go/v2/oracle" @@ -176,19 +176,19 @@ func newRegionWorker(s *eventFeedSession, addr string) *regionWorker { } func (w *regionWorker) initMetrics(ctx context.Context) { - changefeedID := util.ChangefeedIDFromCtx(ctx) + changefeedID := contextutil.ChangefeedIDFromCtx(ctx) metrics := ®ionWorkerMetrics{} metrics.metricReceivedEventSize = eventSize.WithLabelValues("received") metrics.metricDroppedEventSize = eventSize.WithLabelValues("dropped") - metrics.metricPullEventInitializedCounter = pullEventCounter.WithLabelValues(cdcpb.Event_INITIALIZED.String(), changefeedID) - metrics.metricPullEventCommittedCounter = pullEventCounter.WithLabelValues(cdcpb.Event_COMMITTED.String(), changefeedID) - metrics.metricPullEventCommitCounter = pullEventCounter.WithLabelValues(cdcpb.Event_COMMIT.String(), changefeedID) - metrics.metricPullEventPrewriteCounter = pullEventCounter.WithLabelValues(cdcpb.Event_PREWRITE.String(), changefeedID) - metrics.metricPullEventRollbackCounter = pullEventCounter.WithLabelValues(cdcpb.Event_ROLLBACK.String(), changefeedID) - metrics.metricSendEventResolvedCounter = sendEventCounter.WithLabelValues("native-resolved", changefeedID) - metrics.metricSendEventCommitCounter = sendEventCounter.WithLabelValues("commit", changefeedID) - metrics.metricSendEventCommittedCounter = sendEventCounter.WithLabelValues("committed", changefeedID) + metrics.metricPullEventInitializedCounter = pullEventCounter.WithLabelValues(cdcpb.Event_INITIALIZED.String(), changefeedID.ID) + metrics.metricPullEventCommittedCounter = pullEventCounter.WithLabelValues(cdcpb.Event_COMMITTED.String(), changefeedID.ID) + metrics.metricPullEventCommitCounter = pullEventCounter.WithLabelValues(cdcpb.Event_COMMIT.String(), changefeedID.ID) + metrics.metricPullEventPrewriteCounter = pullEventCounter.WithLabelValues(cdcpb.Event_PREWRITE.String(), changefeedID.ID) + metrics.metricPullEventRollbackCounter = pullEventCounter.WithLabelValues(cdcpb.Event_ROLLBACK.String(), changefeedID.ID) + metrics.metricSendEventResolvedCounter = sendEventCounter.WithLabelValues("native-resolved", changefeedID.ID) + metrics.metricSendEventCommitCounter = sendEventCounter.WithLabelValues("commit", changefeedID.ID) + metrics.metricSendEventCommittedCounter = sendEventCounter.WithLabelValues("committed", changefeedID.ID) w.metrics = metrics } diff --git a/cdc/kv/token_region.go b/cdc/kv/token_region.go index bbe5e50b079..c064e29487d 100644 --- a/cdc/kv/token_region.go +++ b/cdc/kv/token_region.go @@ -19,7 +19,8 @@ import ( "time" "github.com/pingcap/errors" - "github.com/pingcap/tiflow/pkg/util" + "github.com/pingcap/tiflow/cdc/contextutil" + "github.com/pingcap/tiflow/cdc/model" "github.com/prometheus/client_golang/prometheus" ) @@ -47,7 +48,7 @@ type LimitRegionRouter interface { // srrMetrics keeps metrics of a Sized Region Router type srrMetrics struct { - changefeed string + changefeed model.ChangeFeedID // mapping from id(TiKV store address) to token used tokens map[string]prometheus.Gauge // mapping from id(TiKV store address) to cached regions @@ -55,7 +56,7 @@ type srrMetrics struct { } func newSrrMetrics(ctx context.Context) *srrMetrics { - changefeed := util.ChangefeedIDFromCtx(ctx) + changefeed := contextutil.ChangefeedIDFromCtx(ctx) return &srrMetrics{ changefeed: changefeed, tokens: make(map[string]prometheus.Gauge), @@ -100,7 +101,7 @@ func (r *sizedRegionRouter) AddRegion(sri singleRegionInfo) { } else { r.buffer[id] = append(r.buffer[id], sri) if _, ok := r.metrics.cachedRegions[id]; !ok { - r.metrics.cachedRegions[id] = cachedRegionSize.WithLabelValues(id, r.metrics.changefeed) + r.metrics.cachedRegions[id] = cachedRegionSize.WithLabelValues(id, r.metrics.changefeed.String()) } r.metrics.cachedRegions[id].Inc() } @@ -114,7 +115,7 @@ func (r *sizedRegionRouter) Acquire(id string) { defer r.lock.Unlock() r.tokens[id]++ if _, ok := r.metrics.tokens[id]; !ok { - r.metrics.tokens[id] = clientRegionTokenSize.WithLabelValues(id, r.metrics.changefeed) + r.metrics.tokens[id] = clientRegionTokenSize.WithLabelValues(id, r.metrics.changefeed.String()) } r.metrics.tokens[id].Inc() } @@ -126,7 +127,7 @@ func (r *sizedRegionRouter) Release(id string) { defer r.lock.Unlock() r.tokens[id]-- if _, ok := r.metrics.tokens[id]; !ok { - r.metrics.tokens[id] = clientRegionTokenSize.WithLabelValues(id, r.metrics.changefeed) + r.metrics.tokens[id] = clientRegionTokenSize.WithLabelValues(id, r.metrics.changefeed.String()) } r.metrics.tokens[id].Dec() } diff --git a/cdc/model/changefeed.go b/cdc/model/changefeed.go index 9ea0b338cd6..b8ab9a087f1 100644 --- a/cdc/model/changefeed.go +++ b/cdc/model/changefeed.go @@ -15,6 +15,7 @@ package model import ( "encoding/json" + "fmt" "math" "net/url" "regexp" @@ -32,7 +33,21 @@ import ( ) // ChangeFeedID is the type for change feed ID -type ChangeFeedID = string +type ChangeFeedID struct { + Namespace string `json:"namespace"` + ID string `json:"id"` +} + +func NewDefaultChangefeedID(id string) ChangeFeedID { + return ChangeFeedID{ + Namespace: "default", + ID: id, + } +} + +func (c ChangeFeedID) String() string { + return fmt.Sprintf("%s-%s", c.Namespace, c.ID) +} // SortEngine is the sorter engine type SortEngine = string @@ -119,6 +134,8 @@ type ChangeFeedInfo struct { SyncPointEnabled bool `json:"sync-point-enabled"` SyncPointInterval time.Duration `json:"sync-point-interval"` CreatorVersion string `json:"creator-version"` + + UpstreamID string `json:"upstream-id"` } const changeFeedIDMaxLen = 128 diff --git a/cdc/model/owner.go b/cdc/model/owner.go index 0a3c6e937d2..c6debe78094 100644 --- a/cdc/model/owner.go +++ b/cdc/model/owner.go @@ -34,7 +34,7 @@ type AdminJobOption struct { // AdminJob holds an admin job type AdminJob struct { - CfID string + CfID ChangeFeedID Type AdminJobType Opts *AdminJobOption Error *RunningError @@ -388,7 +388,7 @@ func (status *ChangeFeedStatus) Unmarshal(data []byte) error { // ProcInfoSnap holds most important replication information of a processor type ProcInfoSnap struct { - CfID string `json:"changefeed-id"` + CfID ChangeFeedID `json:"changefeed-id"` CaptureID string `json:"capture-id"` Tables map[TableID]*TableReplicaInfo `json:"-"` } diff --git a/cdc/model/upstream.go b/cdc/model/upstream.go new file mode 100644 index 00000000000..2ba5d87436d --- /dev/null +++ b/cdc/model/upstream.go @@ -0,0 +1,35 @@ +package model + +import ( + "encoding/json" + + "github.com/pingcap/errors" + cerror "github.com/pingcap/tiflow/pkg/errors" +) + +type UpstreamID string + +// UpstreamInfo store in etcd. +type UpstreamInfo struct { + PD string `json:"pd"` + KeyPath string `json:"key-path"` + CAPath string `json:"cat-path"` + CertPath string `json:"cert-path"` +} + +// Marshal using json.Marshal. +func (c *UpstreamInfo) Marshal() ([]byte, error) { + data, err := json.Marshal(c) + if err != nil { + return nil, cerror.WrapError(cerror.ErrMarshalFailed, err) + } + + return data, nil +} + +// Unmarshal from binary data. +func (c *UpstreamInfo) Unmarshal(data []byte) error { + err := json.Unmarshal(data, c) + return errors.Annotatef(cerror.WrapError(cerror.ErrUnmarshalFailed, err), + "unmarshal data: %v", data) +} diff --git a/cdc/owner/changefeed.go b/cdc/owner/changefeed.go index c0ee763ac2f..96b4f2a5a11 100644 --- a/cdc/owner/changefeed.go +++ b/cdc/owner/changefeed.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/tidb/parser" "github.com/pingcap/tidb/parser/format" timodel "github.com/pingcap/tidb/parser/model" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/redo" schedulerv2 "github.com/pingcap/tiflow/cdc/scheduler" @@ -32,20 +33,21 @@ import ( cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/orchestrator" "github.com/pingcap/tiflow/pkg/txnutil/gc" - "github.com/pingcap/tiflow/pkg/util" + "github.com/pingcap/tiflow/pkg/upstream" "github.com/prometheus/client_golang/prometheus" "github.com/tikv/client-go/v2/oracle" "go.uber.org/zap" ) type changefeed struct { - id model.ChangeFeedID - state *orchestrator.ChangefeedReactorState + id model.ChangeFeedID + clusterID uint64 + state *orchestrator.ChangefeedReactorState + upStream *upstream.UpStream scheduler scheduler barriers *barriers feedStateManager *feedStateManager - gcManager gc.Manager redoManager redo.LogManager schema *schemaWrap4Owner @@ -80,19 +82,19 @@ type changefeed struct { metricsChangefeedResolvedTsLagGauge prometheus.Gauge metricsChangefeedTickDuration prometheus.Observer - newDDLPuller func(ctx cdcContext.Context, startTs uint64) (DDLPuller, error) + newDDLPuller func(ctx cdcContext.Context, upStream *upstream.UpStream, startTs uint64) (DDLPuller, error) newSink func() DDLSink newScheduler func(ctx cdcContext.Context, startTs uint64) (scheduler, error) } -func newChangefeed(id model.ChangeFeedID, gcManager gc.Manager) *changefeed { +func newChangefeed(id model.ChangeFeedID, upStream *upstream.UpStream) *changefeed { c := &changefeed{ id: id, // The scheduler will be created lazily. scheduler: nil, barriers: newBarriers(), feedStateManager: newFeedStateManager(), - gcManager: gcManager, + upStream: upStream, errCh: make(chan error, defaultErrChSize), cancel: func() {}, @@ -105,17 +107,22 @@ func newChangefeed(id model.ChangeFeedID, gcManager gc.Manager) *changefeed { } func newChangefeed4Test( - id model.ChangeFeedID, gcManager gc.Manager, - newDDLPuller func(ctx cdcContext.Context, startTs uint64) (DDLPuller, error), + id model.ChangeFeedID, upStream *upstream.UpStream, + newDDLPuller func(ctx cdcContext.Context, upStream *upstream.UpStream, startTs uint64) (DDLPuller, error), newSink func() DDLSink, ) *changefeed { - c := newChangefeed(id, gcManager) + c := newChangefeed(id, upStream) c.newDDLPuller = newDDLPuller c.newSink = newSink return c } func (c *changefeed) Tick(ctx cdcContext.Context, state *orchestrator.ChangefeedReactorState, captures map[model.CaptureID]*model.CaptureInfo) { + // skip this Tick + if c.upStream.IsInitializing() || c.upStream.IsColse() { + return + } + startTime := time.Now() ctx = cdcContext.WithErrorHandler(ctx, func(err error) error { @@ -129,13 +136,13 @@ func (c *changefeed) Tick(ctx cdcContext.Context, state *orchestrator.Changefeed if c.initialized { costTime := time.Since(startTime) if costTime > changefeedLogsWarnDuration { - log.Warn("changefeed tick took too long", zap.String("changefeed", c.id), zap.Duration("duration", costTime)) + log.Warn("changefeed tick took too long", zap.String("changefeed", c.id.String()), zap.Duration("duration", costTime)) } c.metricsChangefeedTickDuration.Observe(costTime.Seconds()) } if err != nil { - log.Error("an error occurred in Owner", zap.String("changefeed", c.state.ID), zap.Error(err)) + log.Error("an error occurred in Owner", zap.String("changefeed", c.state.ID.String()), zap.Error(err)) var code string if rfcCode, ok := cerror.RFCCode(err); ok { code = string(rfcCode) @@ -143,7 +150,7 @@ func (c *changefeed) Tick(ctx cdcContext.Context, state *orchestrator.Changefeed code = string(cerror.ErrOwnerUnknown.RFCCode()) } c.feedStateManager.handleError(&model.RunningError{ - Addr: util.CaptureAddrFromCtx(ctx), + Addr: contextutil.CaptureAddrFromCtx(ctx), Code: code, Message: err.Error(), }) @@ -157,7 +164,7 @@ func (c *changefeed) checkStaleCheckpointTs(ctx cdcContext.Context, checkpointTs failpoint.Inject("InjectChangefeedFastFailError", func() error { return cerror.ErrGCTTLExceeded.FastGen("InjectChangefeedFastFailError") }) - if err := c.gcManager.CheckStaleCheckpointTs(ctx, c.id, checkpointTs); err != nil { + if err := c.upStream.GCManager.CheckStaleCheckpointTs(ctx, c.id, checkpointTs); err != nil { return errors.Trace(err) } } @@ -199,7 +206,7 @@ func (c *changefeed) tick(ctx cdcContext.Context, state *orchestrator.Changefeed if c.currentTableNames == nil { c.currentTableNames = c.schema.AllTableNames() log.Debug("changefeed current table names updated", - zap.String("changefeed", c.id), + zap.String("changefeed", c.id.ID), zap.Any("tables", c.currentTableNames), ) } @@ -214,7 +221,7 @@ func (c *changefeed) tick(ctx cdcContext.Context, state *orchestrator.Changefeed // which implies that it would be premature to schedule tables or to update status. // So we return here. log.Debug("barrierTs < checkpointTs, premature to schedule tables or update status", - zap.String("changefeed", c.id), + zap.String("changefeed", c.id.ID), zap.Uint64("barrierTs", barrierTs), zap.Uint64("checkpointTs", checkpointTs)) return nil } @@ -223,13 +230,13 @@ func (c *changefeed) tick(ctx cdcContext.Context, state *orchestrator.Changefeed newCheckpointTs, newResolvedTs, err := c.scheduler.Tick(ctx, c.state, c.schema.AllPhysicalTables(), captures) costTime := time.Since(startTime) if costTime > schedulerLogsWarnDuration { - log.Warn("scheduler tick took too long", zap.String("changefeed", c.id), zap.Duration("duration", costTime)) + log.Warn("scheduler tick took too long", zap.String("changefeed", c.id.String()), zap.Duration("duration", costTime)) } if err != nil { return errors.Trace(err) } - pdTime, _ := ctx.GlobalVars().PDClock.CurrentTime() + pdTime, _ := c.upStream.PDClock.CurrentTime() currentTs := oracle.GetPhysical(pdTime) // CheckpointCannotProceed implies that not all tables are being replicated normally, @@ -267,7 +274,7 @@ LOOP: } } checkpointTs := c.state.Info.GetCheckpointTs(c.state.Status) - log.Info("initialize changefeed", zap.String("changefeed", c.state.ID), + log.Info("initialize changefeed", zap.String("changefeed", c.state.ID.String()), zap.Stringer("info", c.state.Info), zap.Uint64("checkpoint ts", checkpointTs)) failpoint.Inject("NewChangefeedNoRetryError", func() { @@ -289,7 +296,7 @@ LOOP: // See more gc doc. ensureTTL := int64(10 * 60) err := gc.EnsureChangefeedStartTsSafety( - ctx, ctx.GlobalVars().PDClient, c.state.ID, ensureTTL, checkpointTs) + ctx, c.upStream.PDClient, c.state.ID, ensureTTL, checkpointTs) if err != nil { return errors.Trace(err) } @@ -307,12 +314,12 @@ LOOP: // So we need to process all DDLs from the range [checkpointTs, ...), but since the semantics of start-ts requires // the lower bound of an open interval, i.e. (startTs, ...), we pass checkpointTs-1 as the start-ts to initialize // the schema cache. - c.schema, err = newSchemaWrap4Owner(ctx.GlobalVars().KVStorage, + c.schema, err = newSchemaWrap4Owner(c.upStream.KVStorage, checkpointTs-1, c.state.Info.Config, ctx.ChangefeedVars().ID) if err != nil { return errors.Trace(err) } - + c.clusterID = c.upStream.PDClient.GetClusterID(ctx) cancelCtx, cancel := cdcContext.WithCancel(ctx) c.cancel = cancel @@ -320,7 +327,7 @@ LOOP: c.sink.run(cancelCtx, cancelCtx.ChangefeedVars().ID, cancelCtx.ChangefeedVars().Info) // Refer to the previous comment on why we use (checkpointTs-1). - c.ddlPuller, err = c.newDDLPuller(cancelCtx, checkpointTs-1) + c.ddlPuller, err = c.newDDLPuller(cancelCtx, c.upStream, checkpointTs-1) if err != nil { return errors.Trace(err) } @@ -330,7 +337,7 @@ LOOP: ctx.Throw(c.ddlPuller.Run(cancelCtx)) }() - stdCtx := util.PutChangefeedIDInCtx(cancelCtx, c.id) + stdCtx := contextutil.PutChangefeedIDInCtx(cancelCtx, c.id) redoManagerOpts := &redo.ManagerOptions{EnableBgRunner: false} redoManager, err := redo.NewManager(stdCtx, c.state.Info.Config.Consistent, redoManagerOpts) if err != nil { @@ -339,12 +346,12 @@ LOOP: c.redoManager = redoManager // init metrics - c.metricsChangefeedBarrierTsGauge = changefeedBarrierTsGauge.WithLabelValues(c.id) - c.metricsChangefeedCheckpointTsGauge = changefeedCheckpointTsGauge.WithLabelValues(c.id) - c.metricsChangefeedCheckpointTsLagGauge = changefeedCheckpointTsLagGauge.WithLabelValues(c.id) - c.metricsChangefeedResolvedTsGauge = changefeedResolvedTsGauge.WithLabelValues(c.id) - c.metricsChangefeedResolvedTsLagGauge = changefeedResolvedTsLagGauge.WithLabelValues(c.id) - c.metricsChangefeedTickDuration = changefeedTickDuration.WithLabelValues(c.id) + c.metricsChangefeedBarrierTsGauge = changefeedBarrierTsGauge.WithLabelValues(c.id.ID) + c.metricsChangefeedCheckpointTsGauge = changefeedCheckpointTsGauge.WithLabelValues(c.id.ID) + c.metricsChangefeedCheckpointTsLagGauge = changefeedCheckpointTsLagGauge.WithLabelValues(c.id.ID) + c.metricsChangefeedResolvedTsGauge = changefeedResolvedTsGauge.WithLabelValues(c.id.ID) + c.metricsChangefeedResolvedTsLagGauge = changefeedResolvedTsLagGauge.WithLabelValues(c.id.ID) + c.metricsChangefeedTickDuration = changefeedTickDuration.WithLabelValues(c.id.ID) // create scheduler c.scheduler, err = c.newScheduler(ctx, checkpointTs) @@ -361,7 +368,7 @@ func (c *changefeed) releaseResources(ctx cdcContext.Context) { c.redoManagerCleanup(ctx) return } - log.Info("close changefeed", zap.String("changefeed", c.state.ID), + log.Info("close changefeed", zap.String("changefeed", c.state.ID.String()), zap.Stringer("info", c.state.Info), zap.Bool("isRemoved", c.isRemoved)) c.cancel() c.cancel = func() {} @@ -372,25 +379,25 @@ func (c *changefeed) releaseResources(ctx cdcContext.Context) { cancel() // We don't need to wait sink Close, pass a canceled context is ok if err := c.sink.close(canceledCtx); err != nil { - log.Warn("Closing sink failed in Owner", zap.String("changefeed", c.state.ID), zap.Error(err)) + log.Warn("Closing sink failed in Owner", zap.String("changefeed", c.state.ID.String()), zap.Error(err)) } c.wg.Wait() c.scheduler.Close(ctx) - changefeedCheckpointTsGauge.DeleteLabelValues(c.id) - changefeedCheckpointTsLagGauge.DeleteLabelValues(c.id) + changefeedCheckpointTsGauge.DeleteLabelValues(c.id.ID) + changefeedCheckpointTsLagGauge.DeleteLabelValues(c.id.ID) c.metricsChangefeedCheckpointTsGauge = nil c.metricsChangefeedCheckpointTsLagGauge = nil - changefeedResolvedTsGauge.DeleteLabelValues(c.id) - changefeedResolvedTsLagGauge.DeleteLabelValues(c.id) + changefeedResolvedTsGauge.DeleteLabelValues(c.id.ID) + changefeedResolvedTsLagGauge.DeleteLabelValues(c.id.ID) c.metricsChangefeedResolvedTsGauge = nil c.metricsChangefeedResolvedTsLagGauge = nil - changefeedTickDuration.DeleteLabelValues(c.id) + changefeedTickDuration.DeleteLabelValues(c.id.ID) c.metricsChangefeedTickDuration = nil - changefeedBarrierTsGauge.DeleteLabelValues(c.id) + changefeedBarrierTsGauge.DeleteLabelValues(c.id.ID) c.metricsChangefeedBarrierTsGauge = nil c.initialized = false @@ -412,14 +419,14 @@ func (c *changefeed) redoManagerCleanup(ctx context.Context) { redoManagerOpts := &redo.ManagerOptions{EnableBgRunner: false} redoManager, err := redo.NewManager(ctx, c.state.Info.Config.Consistent, redoManagerOpts) if err != nil { - log.Error("create redo manager failed", zap.String("changefeed", c.id), zap.Error(err)) + log.Error("create redo manager failed", zap.String("changefeed", c.id.String()), zap.Error(err)) return } c.redoManager = redoManager } err := c.redoManager.Cleanup(ctx) if err != nil { - log.Error("cleanup redo logs failed", zap.String("changefeed", c.id), zap.Error(err)) + log.Error("cleanup redo logs failed", zap.String("changefeed", c.id.String()), zap.Error(err)) } } } @@ -536,20 +543,20 @@ func (c *changefeed) handleBarrier(ctx cdcContext.Context) (uint64, error) { func (c *changefeed) asyncExecDDL(ctx cdcContext.Context, job *timodel.Job) (done bool, err error) { if job.BinlogInfo == nil { - log.Warn("ignore the invalid DDL job", zap.String("changefeed", c.id), + log.Warn("ignore the invalid DDL job", zap.String("changefeed", c.id.String()), zap.Reflect("job", job)) return true, nil } cyclicConfig := c.state.Info.Config.Cyclic if cyclicConfig.IsEnabled() && !cyclicConfig.SyncDDL { log.Info("ignore the DDL job because cyclic config is enabled and syncDDL is false", - zap.String("changefeed", c.id), zap.Reflect("job", job)) + zap.String("changefeed", c.id.String()), zap.Reflect("job", job)) return true, nil } if c.ddlEventCache == nil || c.ddlEventCache.CommitTs != job.BinlogInfo.FinishedTS { ddlEvent, err := c.schema.BuildDDLEvent(job) if err != nil { - log.Error("build DDL event fail", zap.String("changefeed", c.id), + log.Error("build DDL event fail", zap.String("changefeed", c.id.String()), zap.Reflect("job", job), zap.Error(err)) return false, errors.Trace(err) } @@ -563,7 +570,7 @@ func (c *changefeed) asyncExecDDL(ctx cdcContext.Context, job *timodel.Job) (don } ddlEvent.Query, err = addSpecialComment(ddlEvent.Query) if err != nil { - log.Error("add special comment fail", zap.String("changefeed", c.id), + log.Error("add special comment fail", zap.String("changefeed", c.id.String()), zap.String("Query", ddlEvent.Query), zap.Error(err)) return false, errors.Trace(err) } @@ -578,7 +585,7 @@ func (c *changefeed) asyncExecDDL(ctx cdcContext.Context, job *timodel.Job) (don } if job.BinlogInfo.TableInfo != nil && c.schema.IsIneligibleTableID(job.BinlogInfo.TableInfo.ID) { log.Warn("ignore the DDL job of ineligible table", - zap.String("changefeed", c.id), zap.Reflect("job", job)) + zap.String("changefeed", c.id.String()), zap.Reflect("job", job)) return true, nil } done, err = c.sink.emitDDLEvent(ctx, c.ddlEventCache) @@ -626,9 +633,11 @@ func (c *changefeed) Close(ctx cdcContext.Context) { startTime := time.Now() c.releaseResources(ctx) + upstream.UpStreamManager.Release(c.clusterID) + costTime := time.Since(startTime) if costTime > changefeedLogsWarnDuration { - log.Warn("changefeed close took too long", zap.String("changefeed", c.id), zap.Duration("duration", costTime)) + log.Warn("changefeed close took too long", zap.String("changefeed", c.id.String()), zap.Duration("duration", costTime)) } changefeedCloseDuration.Observe(costTime.Seconds()) } diff --git a/cdc/owner/ddl_puller.go b/cdc/owner/ddl_puller.go index 3b425b45081..40c25828eb9 100644 --- a/cdc/owner/ddl_puller.go +++ b/cdc/owner/ddl_puller.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" timodel "github.com/pingcap/tidb/parser/model" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/entry" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/puller" @@ -29,6 +30,7 @@ import ( cdcContext "github.com/pingcap/tiflow/pkg/context" "github.com/pingcap/tiflow/pkg/filter" "github.com/pingcap/tiflow/pkg/regionspan" + "github.com/pingcap/tiflow/pkg/upstream" "github.com/pingcap/tiflow/pkg/util" "go.uber.org/zap" "golang.org/x/sync/errgroup" @@ -65,24 +67,23 @@ type ddlPullerImpl struct { changefeedID string } -func newDDLPuller(ctx cdcContext.Context, startTs uint64) (DDLPuller, error) { - pdCli := ctx.GlobalVars().PDClient +func newDDLPuller(ctx cdcContext.Context, upStream *upstream.UpStream, startTs uint64) (DDLPuller, error) { f, err := filter.NewFilter(ctx.ChangefeedVars().Info.Config) if err != nil { return nil, errors.Trace(err) } var plr puller.Puller - kvStorage := ctx.GlobalVars().KVStorage + kvStorage := upStream.KVStorage // kvStorage can be nil only in the test if kvStorage != nil { plr = puller.NewPuller( - ctx, pdCli, - ctx.GlobalVars().GrpcPool, - ctx.GlobalVars().RegionCache, + ctx, upStream.PDClient, + upStream.GrpcPool, + upStream.RegionCache, kvStorage, - ctx.GlobalVars().PDClock, + upStream.PDClock, // Add "_ddl_puller" to make it different from table pullers. - ctx.ChangefeedVars().ID+"_ddl_puller", + ctx.ChangefeedVars().ID.String()+"_ddl_puller", startTs, []regionspan.Span{regionspan.GetDDLSpan(), regionspan.GetAddIndexDDLSpan()}, false) } @@ -93,7 +94,7 @@ func newDDLPuller(ctx cdcContext.Context, startTs uint64) (DDLPuller, error) { filter: f, cancel: func() {}, clock: clock.New(), - changefeedID: ctx.ChangefeedVars().ID + "_ddl_puller", + changefeedID: ctx.ChangefeedVars().ID.String() + "_ddl_puller", }, nil } @@ -102,9 +103,9 @@ func (h *ddlPullerImpl) Run(ctx cdcContext.Context) error { h.cancel = cancel log.Info("DDL puller started", zap.String("changefeed", h.changefeedID), zap.Uint64("resolvedTS", h.resolvedTS)) - stdCtx := util.PutTableInfoInCtx(ctx, -1, puller.DDLPullerTableName) - stdCtx = util.PutChangefeedIDInCtx(stdCtx, ctx.ChangefeedVars().ID) - stdCtx = util.PutRoleInCtx(stdCtx, util.RoleProcessor) + stdCtx := contextutil.PutTableInfoInCtx(ctx, -1, puller.DDLPullerTableName) + stdCtx = contextutil.PutChangefeedIDInCtx(stdCtx, ctx.ChangefeedVars().ID) + stdCtx = contextutil.PutRoleInCtx(stdCtx, util.RoleProcessor) g, stdCtx := errgroup.WithContext(stdCtx) lastResolvedTsAdvancedTime := h.clock.Now() diff --git a/cdc/owner/ddl_sink.go b/cdc/owner/ddl_sink.go index 1aa29d02c77..12a8334fcda 100644 --- a/cdc/owner/ddl_sink.go +++ b/cdc/owner/ddl_sink.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/sink" "github.com/pingcap/tiflow/cdc/sink/mysql" @@ -97,8 +98,8 @@ func ddlSinkInitializer(ctx cdcContext.Context, a *ddlSinkImpl, id model.ChangeF return errors.Trace(err) } - stdCtx := util.PutChangefeedIDInCtx(ctx, id) - stdCtx = util.PutRoleInCtx(stdCtx, util.RoleOwner) + stdCtx := contextutil.PutChangefeedIDInCtx(ctx, id) + stdCtx = contextutil.PutRoleInCtx(stdCtx, util.RoleOwner) s, err := sink.New(stdCtx, id, info.SinkURI, filter, info.Config, info.Opts, a.errCh) if err != nil { return errors.Trace(err) @@ -131,13 +132,13 @@ func (s *ddlSinkImpl) run(ctx cdcContext.Context, id model.ChangeFeedID, info *m start := time.Now() if err := s.sinkInitHandler(ctx, s, id, info); err != nil { log.Warn("ddl sink initialize failed", - zap.String("changefeed", ctx.ChangefeedVars().ID), + zap.String("changefeed", ctx.ChangefeedVars().ID.String()), zap.Duration("duration", time.Since(start))) ctx.Throw(err) return } log.Info("ddl sink initialized, start processing...", - zap.String("changefeed", ctx.ChangefeedVars().ID), + zap.String("changefeed", ctx.ChangefeedVars().ID.String()), zap.Duration("duration", time.Since(start))) // TODO make the tick duration configurable @@ -178,7 +179,7 @@ func (s *ddlSinkImpl) run(ctx cdcContext.Context, id model.ChangeFeedID, info *m } case ddl := <-s.ddlCh: log.Info("begin emit ddl event", - zap.String("changefeed", ctx.ChangefeedVars().ID), + zap.String("changefeed", ctx.ChangefeedVars().ID.String()), zap.Any("DDL", ddl)) err := s.sink.EmitDDLEvent(ctx, ddl) failpoint.Inject("InjectChangefeedDDLError", func() { @@ -186,7 +187,7 @@ func (s *ddlSinkImpl) run(ctx cdcContext.Context, id model.ChangeFeedID, info *m }) if err == nil || cerror.ErrDDLEventIgnored.Equal(errors.Cause(err)) { log.Info("Execute DDL succeeded", - zap.String("changefeed", ctx.ChangefeedVars().ID), + zap.String("changefeed", ctx.ChangefeedVars().ID.String()), zap.Bool("ignored", err != nil), zap.Any("ddl", ddl)) atomic.StoreUint64(&s.ddlFinishedTs, ddl.CommitTs) @@ -195,7 +196,7 @@ func (s *ddlSinkImpl) run(ctx cdcContext.Context, id model.ChangeFeedID, info *m // If DDL executing failed, and the error can not be ignored, // throw an error and pause the changefeed log.Error("Execute DDL failed", - zap.String("changefeed", ctx.ChangefeedVars().ID), + zap.String("changefeed", ctx.ChangefeedVars().ID.String()), zap.Error(err), zap.Any("ddl", ddl)) ctx.Throw(errors.Trace(err)) @@ -217,13 +218,13 @@ func (s *ddlSinkImpl) emitDDLEvent(ctx cdcContext.Context, ddl *model.DDLEvent) if ddl.CommitTs <= ddlFinishedTs { // the DDL event is executed successfully, and done is true log.Info("ddl already executed", - zap.String("changefeed", ctx.ChangefeedVars().ID), + zap.String("changefeed", ctx.ChangefeedVars().ID.String()), zap.Uint64("ddlFinishedTs", ddlFinishedTs), zap.Any("DDL", ddl)) return true, nil } if ddl.CommitTs <= s.ddlSentTs { log.Debug("ddl is not finished yet", - zap.String("changefeed", ctx.ChangefeedVars().ID), + zap.String("changefeed", ctx.ChangefeedVars().ID.String()), zap.Uint64("ddlSentTs", s.ddlSentTs), zap.Any("DDL", ddl)) // the DDL event is executing and not finished yet, return false return false, nil @@ -234,11 +235,11 @@ func (s *ddlSinkImpl) emitDDLEvent(ctx cdcContext.Context, ddl *model.DDLEvent) case s.ddlCh <- ddl: s.ddlSentTs = ddl.CommitTs log.Info("ddl is sent", - zap.String("changefeed", ctx.ChangefeedVars().ID), + zap.String("changefeed", ctx.ChangefeedVars().ID.String()), zap.Uint64("ddlSentTs", s.ddlSentTs)) default: log.Warn("ddl chan full, send it the next round", - zap.String("changefeed", ctx.ChangefeedVars().ID), + zap.String("changefeed", ctx.ChangefeedVars().ID.String()), zap.Uint64("ddlSentTs", s.ddlSentTs), zap.Uint64("ddlFinishedTs", s.ddlFinishedTs), zap.Any("DDL", ddl)) // if this hit, we think that ddlCh is full, diff --git a/cdc/owner/feed_state_manager.go b/cdc/owner/feed_state_manager.go index 3aa532723e9..57d134fe8d1 100644 --- a/cdc/owner/feed_state_manager.go +++ b/cdc/owner/feed_state_manager.go @@ -172,7 +172,7 @@ func (m *feedStateManager) PushAdminJob(job *model.AdminJob) { switch job.Type { case model.AdminStop, model.AdminResume, model.AdminRemove: default: - log.Panic("Can not handle this job", zap.String("changefeed", m.state.ID), + log.Panic("Can not handle this job", zap.String("changefeed", m.state.ID.String()), zap.String("changefeedState", string(m.state.Info.State)), zap.Any("job", job)) } m.pushAdminJob(job) @@ -183,13 +183,13 @@ func (m *feedStateManager) handleAdminJob() (jobsPending bool) { if job == nil || job.CfID != m.state.ID { return false } - log.Info("handle admin job", zap.String("changefeed", m.state.ID), zap.Reflect("job", job)) + log.Info("handle admin job", zap.String("changefeed", m.state.ID.String()), zap.Reflect("job", job)) switch job.Type { case model.AdminStop: switch m.state.Info.State { case model.StateNormal, model.StateError: default: - log.Warn("can not pause the changefeed in the current state", zap.String("changefeed", m.state.ID), + log.Warn("can not pause the changefeed in the current state", zap.String("changefeed", m.state.ID.String()), zap.String("changefeedState", string(m.state.Info.State)), zap.Any("job", job)) return } @@ -201,7 +201,7 @@ func (m *feedStateManager) handleAdminJob() (jobsPending bool) { case model.StateNormal, model.StateError, model.StateFailed, model.StateStopped, model.StateFinished, model.StateRemoved: default: - log.Warn("can not remove the changefeed in the current state", zap.String("changefeed", m.state.ID), + log.Warn("can not remove the changefeed in the current state", zap.String("changefeed", m.state.ID.String()), zap.String("changefeedState", string(m.state.Info.State)), zap.Any("job", job)) return } @@ -218,12 +218,12 @@ func (m *feedStateManager) handleAdminJob() (jobsPending bool) { return nil, true, nil }) checkpointTs := m.state.Info.GetCheckpointTs(m.state.Status) - log.Info("the changefeed is removed", zap.String("changefeed", m.state.ID), zap.Uint64("checkpointTs", checkpointTs)) + log.Info("the changefeed is removed", zap.String("changefeed", m.state.ID.String()), zap.Uint64("checkpointTs", checkpointTs)) case model.AdminResume: switch m.state.Info.State { case model.StateFailed, model.StateError, model.StateStopped, model.StateFinished: default: - log.Warn("can not resume the changefeed in the current state", zap.String("changefeed", m.state.ID), + log.Warn("can not resume the changefeed in the current state", zap.String("changefeed", m.state.ID.String()), zap.String("changefeedState", string(m.state.Info.State)), zap.Any("job", job)) return } @@ -248,7 +248,7 @@ func (m *feedStateManager) handleAdminJob() (jobsPending bool) { switch m.state.Info.State { case model.StateNormal: default: - log.Warn("can not finish the changefeed in the current state", zap.String("changefeed", m.state.ID), + log.Warn("can not finish the changefeed in the current state", zap.String("changefeed", m.state.ID.String()), zap.String("changefeedState", string(m.state.Info.State)), zap.Any("job", job)) return } @@ -256,7 +256,7 @@ func (m *feedStateManager) handleAdminJob() (jobsPending bool) { jobsPending = true m.patchState(model.StateFinished) default: - log.Warn("Unknown admin job", zap.Any("adminJob", job), zap.String("changefeed", m.state.ID)) + log.Warn("Unknown admin job", zap.Any("adminJob", job), zap.String("changefeed", m.state.ID.String())) } return } @@ -341,7 +341,7 @@ func (m *feedStateManager) errorsReportedByProcessors() []*model.RunningError { runningErrors = make(map[string]*model.RunningError) } runningErrors[position.Error.Code] = position.Error - log.Error("processor report an error", zap.String("changefeed", m.state.ID), zap.String("captureID", captureID), zap.Any("error", position.Error)) + log.Error("processor report an error", zap.String("changefeed", m.state.ID.String()), zap.String("captureID", captureID), zap.Any("error", position.Error)) m.state.PatchTaskPosition(captureID, func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { if position == nil { return nil, false, nil @@ -426,7 +426,7 @@ func (m *feedStateManager) handleError(errs ...*model.RunningError) { m.shouldBeRunning = false m.patchState(model.StateFailed) } else { - log.Info("changefeed restart backoff interval is changed", zap.String("changefeed", m.state.ID), + log.Info("changefeed restart backoff interval is changed", zap.String("changefeed", m.state.ID.String()), zap.Duration("oldInterval", oldBackoffInterval), zap.Duration("newInterval", m.backoffInterval)) } } diff --git a/cdc/owner/owner.go b/cdc/owner/owner.go index 8cb8e0e2c5b..6295cebe90d 100644 --- a/cdc/owner/owner.go +++ b/cdc/owner/owner.go @@ -15,6 +15,7 @@ package owner import ( "context" + "fmt" "io" "math" "sync" @@ -29,7 +30,7 @@ import ( cdcContext "github.com/pingcap/tiflow/pkg/context" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/orchestrator" - "github.com/pingcap/tiflow/pkg/txnutil/gc" + "github.com/pingcap/tiflow/pkg/upstream" "github.com/pingcap/tiflow/pkg/version" pd "github.com/tikv/pd/client" "go.uber.org/zap" @@ -90,11 +91,11 @@ type Owner interface { } type ownerImpl struct { + // 从 clusterID 映射到 changefeeds map + // changefeeds map[uint64]map[model.ChangeFeedID]*changefeed changefeeds map[model.ChangeFeedID]*changefeed captures map[model.CaptureID]*model.CaptureInfo - gcManager gc.Manager - ownerJobQueue struct { sync.Mutex queue []*ownerJob @@ -109,14 +110,14 @@ type ownerImpl struct { // as it is not a thread-safe value. bootstrapped bool - newChangefeed func(id model.ChangeFeedID, gcManager gc.Manager) *changefeed + newChangefeed func(id model.ChangeFeedID, upStream *upstream.UpStream) *changefeed } // NewOwner creates a new Owner -func NewOwner(pdClient pd.Client) Owner { +func NewOwner() Owner { return &ownerImpl{ - changefeeds: make(map[model.ChangeFeedID]*changefeed), - gcManager: gc.NewManager(pdClient), + changefeeds: make(map[model.ChangeFeedID]*changefeed), + // gcManager: gc.NewManager(pdClient), lastTickTime: time.Now(), newChangefeed: newChangefeed, logLimiter: rate.NewLimiter(versionInconsistentLogRate, versionInconsistentLogRate), @@ -125,15 +126,15 @@ func NewOwner(pdClient pd.Client) Owner { // NewOwner4Test creates a new Owner for test func NewOwner4Test( - newDDLPuller func(ctx cdcContext.Context, startTs uint64) (DDLPuller, error), + newDDLPuller func(ctx cdcContext.Context, upStream *upstream.UpStream, startTs uint64) (DDLPuller, error), newSink func() DDLSink, pdClient pd.Client, ) Owner { - o := NewOwner(pdClient).(*ownerImpl) + o := NewOwner().(*ownerImpl) // Most tests do not need to test bootstrap. o.bootstrapped = true - o.newChangefeed = func(id model.ChangeFeedID, gcManager gc.Manager) *changefeed { - return newChangefeed4Test(id, gcManager, newDDLPuller, newSink) + o.newChangefeed = func(id model.ChangeFeedID, upStream *upstream.UpStream) *changefeed { + return newChangefeed4Test(id, upStream, newDDLPuller, newSink) } return o } @@ -165,6 +166,13 @@ func (o *ownerImpl) Tick(stdCtx context.Context, rawState orchestrator.ReactorSt if !o.clusterVersionConsistent(state.Captures) { return state, nil } + + for key, info := range state.Upstreams { + if err := upstream.UpStreamManager.TryInit(key, info); err != nil { + return nil, errors.Trace(err) + } + } + // Owner should update GC safepoint before initializing changefeed, so // changefeed can remove its "ticdc-creating" service GC safepoint during // initializing. @@ -190,7 +198,12 @@ func (o *ownerImpl) Tick(stdCtx context.Context, rawState orchestrator.ReactorSt }) cfReactor, exist := o.changefeeds[changefeedID] if !exist { - cfReactor = o.newChangefeed(changefeedID, o.gcManager) + // 需要获取 clusterID 来划分 changefeed 所属集群 + upStream, err := upstream.UpStreamManager.Get(changefeedState.Info.UpstreamID) + if err != nil { + return state, errors.Trace(err) + } + cfReactor = o.newChangefeed(changefeedID, upStream) o.changefeeds[changefeedID] = cfReactor } cfReactor.Tick(ctx, changefeedState, state.Captures) @@ -220,6 +233,7 @@ func (o *ownerImpl) Tick(stdCtx context.Context, rawState orchestrator.ReactorSt } return state, cerror.ErrReactorFinished.GenWithStackByArgs() } + return state, nil } @@ -356,7 +370,7 @@ func (o *ownerImpl) updateMetrics(state *orchestrator.GlobalReactorState) { if conf.Debug != nil && conf.Debug.EnableNewScheduler { for cfID, cf := range o.changefeeds { if cf.state != nil && cf.state.Info != nil { - changefeedStatusGauge.WithLabelValues(cfID).Set(float64(cf.state.Info.State.ToInt())) + changefeedStatusGauge.WithLabelValues(cfID.String()).Set(float64(cf.state.Info.State.ToInt())) } // The InfoProvider is a proxy object returning information @@ -372,10 +386,10 @@ func (o *ownerImpl) updateMetrics(state *orchestrator.GlobalReactorState) { for captureID, info := range o.captures { ownerMaintainTableNumGauge. - WithLabelValues(cfID, info.AdvertiseAddr, maintainTableTypeTotal). + WithLabelValues(cfID.String(), info.AdvertiseAddr, maintainTableTypeTotal). Set(float64(totalCounts[captureID])) ownerMaintainTableNumGauge. - WithLabelValues(cfID, info.AdvertiseAddr, maintainTableTypeWip). + WithLabelValues(cfID.String(), info.AdvertiseAddr, maintainTableTypeWip). Set(float64(pendingCounts[captureID])) } } @@ -389,13 +403,13 @@ func (o *ownerImpl) updateMetrics(state *orchestrator.GlobalReactorState) { continue } ownerMaintainTableNumGauge. - WithLabelValues(changefeedID, captureInfo.AdvertiseAddr, maintainTableTypeTotal). + WithLabelValues(changefeedID.String(), captureInfo.AdvertiseAddr, maintainTableTypeTotal). Set(float64(len(taskStatus.Tables))) ownerMaintainTableNumGauge. - WithLabelValues(changefeedID, captureInfo.AdvertiseAddr, maintainTableTypeWip). + WithLabelValues(changefeedID.String(), captureInfo.AdvertiseAddr, maintainTableTypeWip). Set(float64(len(taskStatus.Operation))) if changefeedState.Info != nil { - changefeedStatusGauge.WithLabelValues(changefeedID).Set(float64(changefeedState.Info.State.ToInt())) + changefeedStatusGauge.WithLabelValues(changefeedID.String()).Set(float64(changefeedState.Info.State.ToInt())) } } } @@ -567,11 +581,12 @@ func (o *ownerImpl) pushOwnerJob(job *ownerJob) { o.ownerJobQueue.queue = append(o.ownerJobQueue.queue, job) } +// 这个函数的逻辑需要修改,owner 应该分别遍历不同上游的 changefeed,然后再计算 func (o *ownerImpl) updateGCSafepoint( ctx context.Context, state *orchestrator.GlobalReactorState, ) error { - forceUpdate := false - minCheckpointTs := uint64(math.MaxUint64) + minCheckpointTsMap := make(map[string]uint64) + forceUpdateMap := make(map[string]bool) for changefeedID, changefeedState := range state.Changefeeds { if changefeedState.Info == nil { continue @@ -582,21 +597,42 @@ func (o *ownerImpl) updateGCSafepoint( continue } checkpointTs := changefeedState.Info.GetCheckpointTs(changefeedState.Status) + minCheckpointTs, ok := minCheckpointTsMap[changefeedState.Info.UpstreamID] + if !ok { + minCheckpointTs = uint64(math.MaxUint64) + } if minCheckpointTs > checkpointTs { minCheckpointTs = checkpointTs } + minCheckpointTsMap[changefeedState.Info.UpstreamID] = minCheckpointTs // Force update when adding a new changefeed. _, exist := o.changefeeds[changefeedID] if !exist { - forceUpdate = true + forceUpdateMap[changefeedState.Info.UpstreamID] = true + } + } + for id, minCheckpointTs := range minCheckpointTsMap { + // 此处逻辑需要修改, 需要根据上游的不同来更新 safePoint + upStream, err := upstream.UpStreamManager.Get(id) + if err != nil { + return errors.Trace(err) } + if upStream == nil { + log.Panic("upStream is nil") + } + if !upStream.IsInitialized() { + log.Panic("upStream not initialized") + } + if upStream.GCManager == nil { + log.Panic("gcManager is nil") + } + // When the changefeed starts up, CDC will do a snapshot read at + // (checkpointTs - 1) from TiKV, so (checkpointTs - 1) should be an upper + // bound for the GC safepoint. + gcSafepointUpperBound := minCheckpointTs - 1 + err = upStream.GCManager.TryUpdateGCSafePoint(ctx, fmt.Sprintf("%s-%s", config.GetGlobalServerConfig().ClusterID, id), gcSafepointUpperBound, forceUpdateMap[id]) } - // When the changefeed starts up, CDC will do a snapshot read at - // (checkpointTs - 1) from TiKV, so (checkpointTs - 1) should be an upper - // bound for the GC safepoint. - gcSafepointUpperBound := minCheckpointTs - 1 - err := o.gcManager.TryUpdateGCSafePoint(ctx, gcSafepointUpperBound, forceUpdate) - return errors.Trace(err) + return nil } // StatusProvider returns a StatusProvider diff --git a/cdc/owner/scheduler.go b/cdc/owner/scheduler.go index 351da5fb861..2aa65732629 100644 --- a/cdc/owner/scheduler.go +++ b/cdc/owner/scheduler.go @@ -144,7 +144,7 @@ func (s *schedulerV2) DispatchTable( log.Info("schedulerV2: DispatchTable", zap.Any("message", message), zap.Any("successful", done), - zap.String("changefeedID", changeFeedID), + zap.String("changefeedID", changeFeedID.String()), zap.String("captureID", captureID)) }() @@ -182,7 +182,7 @@ func (s *schedulerV2) Announce( log.Info("schedulerV2: Announce", zap.Any("message", message), zap.Any("successful", done), - zap.String("changefeedID", changeFeedID), + zap.String("changefeedID", changeFeedID.String()), zap.String("captureID", captureID)) }() @@ -234,7 +234,7 @@ func (s *schedulerV2) trySendMessage( if cerror.ErrPeerMessageClientClosed.Equal(err) { log.Warn("peer messaging client is closed while trying to send a message through it. "+ "Report a bug if this warning repeats", - zap.String("changefeed", s.changeFeedID), + zap.String("changefeed", s.changeFeedID.String()), zap.String("target", target)) return false, nil } @@ -245,7 +245,7 @@ func (s *schedulerV2) trySendMessage( } func (s *schedulerV2) Close(ctx context.Context) { - log.Debug("scheduler closed", zap.String("changefeed", s.changeFeedID)) + log.Debug("scheduler closed", zap.String("changefeed", s.changeFeedID.String())) s.deregisterPeerMessageHandlers(ctx) } diff --git a/cdc/owner/scheduler_v1.go b/cdc/owner/scheduler_v1.go index 814f7804805..8c329419341 100644 --- a/cdc/owner/scheduler_v1.go +++ b/cdc/owner/scheduler_v1.go @@ -202,7 +202,7 @@ func (s *oldScheduler) dispatchToTargetCaptures(pendingJobs []*schedulerJob) { workloads[pendingJob.TargetCapture] -= 1 default: log.Panic("Unreachable, please report a bug", - zap.String("changefeed", s.state.ID), zap.Any("job", pendingJob)) + zap.String("changefeed", s.state.ID.String()), zap.Any("job", pendingJob)) } } @@ -309,7 +309,7 @@ func (s *oldScheduler) cleanUpFinishedOperations() { for captureID := range s.state.TaskStatuses { s.state.PatchTaskStatus(captureID, func(status *model.TaskStatus) (*model.TaskStatus, bool, error) { if status == nil { - log.Warn("task status of the capture is not found, may be the key in etcd was deleted", zap.String("captureID", captureID), zap.String("changefeed", s.state.ID)) + log.Warn("task status of the capture is not found, may be the key in etcd was deleted", zap.String("captureID", captureID), zap.String("changefeed", s.state.ID.String())) return status, false, nil } @@ -360,7 +360,7 @@ func (s *oldScheduler) rebalanceByTableNum() (shouldUpdateState bool) { shouldUpdateState = true log.Info("Start rebalancing", - zap.String("changefeed", s.state.ID), + zap.String("changefeed", s.state.ID.String()), zap.Int("tableNum", totalTableNum), zap.Int("captureNum", captureNum), zap.Int("targetLimit", upperLimitPerCapture)) @@ -392,7 +392,7 @@ func (s *oldScheduler) rebalanceByTableNum() (shouldUpdateState bool) { log.Info("Rebalance: Move table", zap.Int64("tableID", tableID), zap.String("capture", captureID), - zap.String("changefeed", s.state.ID)) + zap.String("changefeed", s.state.ID.String())) return status, true, nil }) tableNum2Remove-- diff --git a/cdc/owner/schema.go b/cdc/owner/schema.go index 0a81d229245..cfea4775b26 100644 --- a/cdc/owner/schema.go +++ b/cdc/owner/schema.go @@ -106,20 +106,20 @@ func (s *schemaWrap4Owner) AllTableNames() []model.TableName { func (s *schemaWrap4Owner) HandleDDL(job *timodel.Job) error { if job.BinlogInfo.FinishedTS <= s.ddlHandledTs { log.Warn("job finishTs is less than schema handleTs, discard invalid job", - zap.String("changefeed", s.id), zap.Stringer("job", job), + zap.String("changefeed", s.id.String()), zap.Stringer("job", job), zap.Any("ddlHandledTs", s.ddlHandledTs)) return nil } s.allPhysicalTablesCache = nil err := s.schemaSnapshot.HandleDDL(job) if err != nil { - log.Error("handle DDL failed", zap.String("changefeed", s.id), + log.Error("handle DDL failed", zap.String("changefeed", s.id.String()), zap.String("DDL", job.Query), zap.Stringer("job", job), zap.Error(err), zap.Any("role", util.RoleOwner)) return errors.Trace(err) } - log.Info("handle DDL", zap.String("changefeed", s.id), + log.Info("handle DDL", zap.String("changefeed", s.id.String()), zap.String("DDL", job.Query), zap.Stringer("job", job), zap.Any("role", util.RoleOwner)) @@ -162,7 +162,7 @@ func (s *schemaWrap4Owner) shouldIgnoreTable(t *model.TableInfo) bool { // See https://github.com/pingcap/tiflow/issues/4559 if !t.IsSequence() { log.Warn("skip ineligible table", zap.Int64("tableID", t.ID), - zap.Stringer("tableName", t.TableName), zap.String("changefeed", s.id)) + zap.Stringer("tableName", t.TableName), zap.String("changefeed", s.id.String())) } return true } diff --git a/cdc/processor/agent.go b/cdc/processor/agent.go index 4ad411590c6..d1a3e5eb2e6 100644 --- a/cdc/processor/agent.go +++ b/cdc/processor/agent.go @@ -104,7 +104,7 @@ func newAgent( flushInterval := time.Duration(conf.ProcessorFlushInterval) log.Debug("creating processor agent", - zap.String("changefeed", changeFeedID), + zap.String("changefeed", changeFeedID.String()), zap.Duration("sendCheckpointTsInterval", flushInterval)) ret.BaseAgent = scheduler.NewBaseAgent( @@ -116,7 +116,7 @@ func newAgent( // Note that registerPeerMessageHandlers sets handlerErrChs. if err := ret.registerPeerMessageHandlers(); err != nil { log.Warn("failed to register processor message handlers", - zap.String("changefeed", changeFeedID), + zap.String("changefeed", changeFeedID.String()), zap.Error(err)) return nil, errors.Trace(err) } @@ -124,7 +124,7 @@ func newAgent( if err != nil { if err1 := ret.deregisterPeerMessageHandlers(); err1 != nil { log.Warn("failed to unregister processor message handlers", - zap.String("changefeed", changeFeedID), + zap.String("changefeed", changeFeedID.String()), zap.Error(err)) } } @@ -133,7 +133,7 @@ func newAgent( etcdCliCtx, cancel := stdContext.WithTimeout(ctx, getOwnerFromEtcdTimeout) defer cancel() ownerCaptureID, err := ctx.GlobalVars().EtcdClient. - GetOwnerID(etcdCliCtx, etcd.CaptureOwnerKey) + GetOwnerID(etcdCliCtx, etcd.CaptureOwnerKey()) if err != nil { if err != concurrency.ErrElectionNoLeader { return nil, errors.Trace(err) @@ -142,14 +142,14 @@ func newAgent( // If we are registered in Etcd, an elected Owner will have to // contact us before it can schedule any table. log.Info("no owner found. We will wait for an owner to contact us.", - zap.String("changefeed", changeFeedID), + zap.String("changefeed", changeFeedID.String()), zap.Error(err)) return ret, nil } ret.ownerCaptureID = ownerCaptureID log.Debug("found owner", - zap.String("changefeed", changeFeedID), + zap.String("changefeed", changeFeedID.String()), zap.String("ownerID", ownerCaptureID)) ret.ownerRevision, err = ctx.GlobalVars().EtcdClient. @@ -158,7 +158,7 @@ func newAgent( if cerror.ErrOwnerNotFound.Equal(err) || cerror.ErrNotOwner.Equal(err) { // These are expected errors when no owner has been elected log.Info("no owner found when querying for the owner revision", - zap.String("changefeed", changeFeedID), + zap.String("changefeed", changeFeedID.String()), zap.Error(err)) ret.ownerCaptureID = "" return ret, nil @@ -196,7 +196,7 @@ func (a *agentImpl) FinishTableOperation( if !a.Barrier(ctx) { if _, exists := a.barrierSeqs[topic]; exists { log.L().Info("Delay sending FinishTableOperation due to pending sync", - zap.String("changefeedID", a.changeFeed), + zap.String("changefeedID", a.changeFeed.String()), zap.String("ownerID", a.ownerCaptureID), zap.Int64("tableID", tableID), zap.String("epoch", epoch)) @@ -211,7 +211,7 @@ func (a *agentImpl) FinishTableOperation( } log.Info("SchedulerAgent: FinishTableOperation", zap.Any("message", message), zap.Bool("successful", done), - zap.String("changefeedID", a.changeFeed), + zap.String("changefeedID", a.changeFeed.String()), zap.String("ownerID", a.ownerCaptureID)) }() @@ -251,13 +251,13 @@ func (a *agentImpl) SyncTaskStatuses( log.Debug("SchedulerAgent: SyncTaskStatuses", zap.Any("message", message), zap.Bool("successful", done), - zap.String("changefeedID", a.changeFeed), + zap.String("changefeedID", a.changeFeed.String()), zap.String("ownerID", a.ownerCaptureID)) return } log.Info("SchedulerAgent: SyncTaskStatuses", zap.Bool("successful", done), - zap.String("changefeedID", a.changeFeed), + zap.String("changefeedID", a.changeFeed.String()), zap.String("ownerID", a.ownerCaptureID)) }() @@ -291,7 +291,7 @@ func (a *agentImpl) SendCheckpoint( log.Debug("SchedulerAgent: SendCheckpoint", zap.Any("message", message), zap.Bool("successful", done), - zap.String("changefeedID", a.changeFeed), + zap.String("changefeedID", a.changeFeed.String()), zap.String("ownerID", a.ownerCaptureID)) }() @@ -317,7 +317,7 @@ func (a *agentImpl) Barrier(_ context.Context) (done bool) { sinceLastAdvanced := a.clock.Since(a.barrierLastCleared) if sinceLastAdvanced > barrierNotAdvancingWarnDuration && a.barrierLogRateLimiter.Allow() { log.Warn("processor send barrier not advancing, report a bug if this log repeats", - zap.String("changefeed", a.changeFeed), + zap.String("changefeed", a.changeFeed.String()), zap.String("ownerID", a.ownerCaptureID), zap.Duration("duration", sinceLastAdvanced)) } @@ -332,7 +332,7 @@ func (a *agentImpl) Barrier(_ context.Context) (done bool) { // We need to wait for the sync request anyways, and // there would not be any table to replicate for now. log.Debug("waiting for owner to request sync", - zap.String("changefeed", a.changeFeed)) + zap.String("changefeed", a.changeFeed.String())) return false } @@ -381,7 +381,7 @@ func (a *agentImpl) Close() error { log.Debug("processor messenger: closing", zap.Stack("stack")) if err := a.deregisterPeerMessageHandlers(); err != nil { log.Warn("failed to deregister processor message handlers", - zap.String("changefeed", a.changeFeed), + zap.String("changefeed", a.changeFeed.String()), zap.Error(err)) return errors.Trace(err) } @@ -411,7 +411,7 @@ func (a *agentImpl) trySendMessage( if cerror.ErrPeerMessageClientClosed.Equal(err) { log.Warn("peer messaging client is closed while trying to send a message through it. "+ "Report a bug if this warning repeats", - zap.String("changefeed", a.changeFeed), + zap.String("changefeed", a.changeFeed.String()), zap.String("target", target)) return false, nil } diff --git a/cdc/processor/manager.go b/cdc/processor/manager.go index 0b4214be793..790a177a5d3 100644 --- a/cdc/processor/manager.go +++ b/cdc/processor/manager.go @@ -27,6 +27,7 @@ import ( cdcContext "github.com/pingcap/tiflow/pkg/context" cerrors "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/orchestrator" + "github.com/pingcap/tiflow/pkg/upstream" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" ) @@ -49,10 +50,11 @@ type command struct { // Manager is a manager of processor, which maintains the state and behavior of processors type Manager struct { processors map[model.ChangeFeedID]*processor + upStreams map[model.ChangeFeedID]*upstream.UpStream commandQueue chan *command - newProcessor func(cdcContext.Context) *processor + newProcessor func(cdcContext.Context, *upstream.UpStream) *processor enableNewScheduler bool @@ -81,6 +83,11 @@ func (m *Manager) Tick(stdCtx context.Context, state orchestrator.ReactorState) return state, err } + for key, info := range globalState.Upstreams { + if err := upstream.UpStreamManager.TryInit(key, info); err != nil { + return nil, errors.Trace(err) + } + } captureID := ctx.GlobalVars().CaptureInfo.ID var inactiveChangefeedCount int for changefeedID, changefeedState := range globalState.Changefeeds { @@ -97,7 +104,11 @@ func (m *Manager) Tick(stdCtx context.Context, state orchestrator.ReactorState) if !exist { if m.enableNewScheduler { failpoint.Inject("processorManagerHandleNewChangefeedDelay", nil) - processor = m.newProcessor(ctx) + upStream, err := upstream.UpStreamManager.Get(changefeedState.Info.UpstreamID) + if err != nil { + return state, err + } + processor = m.newProcessor(ctx, upStream) m.processors[changefeedID] = processor } else { if changefeedState.Status.AdminJobType.IsStopState() || changefeedState.TaskStatuses[captureID].AdminJobType.IsStopState() { @@ -109,7 +120,11 @@ func (m *Manager) Tick(stdCtx context.Context, state orchestrator.ReactorState) continue } failpoint.Inject("processorManagerHandleNewChangefeedDelay", nil) - processor = m.newProcessor(ctx) + upStream, err := upstream.UpStreamManager.Get(changefeedState.Info.UpstreamID) + if err != nil { + return state, err + } + processor = m.newProcessor(ctx, upStream) m.processors[changefeedID] = processor } } @@ -139,13 +154,13 @@ func (m *Manager) closeProcessor(changefeedID model.ChangeFeedID) { err := processor.Close() costTime := time.Since(startTime) if costTime > processorLogsWarnDuration { - log.Warn("processor close took too long", zap.String("changefeed", changefeedID), + log.Warn("processor close took too long", zap.String("changefeed", changefeedID.String()), zap.String("capture", captureID), zap.Duration("duration", costTime)) } m.metricProcessorCloseDuration.Observe(costTime.Seconds()) if err != nil { log.Warn("failed to close processor", - zap.String("changefeed", changefeedID), + zap.String("changefeed", changefeedID.String()), zap.Error(err)) } delete(m.processors, changefeedID) diff --git a/cdc/processor/pipeline/actor_node_context.go b/cdc/processor/pipeline/actor_node_context.go index c6beb15ca51..3ae9fa497d6 100644 --- a/cdc/processor/pipeline/actor_node_context.go +++ b/cdc/processor/pipeline/actor_node_context.go @@ -99,7 +99,7 @@ func (c *actorNodeContext) SendToNextNode(msg pmessage.Message) { case <-c.Context.Done(): log.Info("context is canceled", zap.String("tableName", c.tableName), - zap.String("changefeed", c.changefeedVars.ID)) + zap.String("changefeed", c.changefeedVars.ID.String())) case c.outputCh <- msg: c.trySendTickMessage() } diff --git a/cdc/processor/pipeline/puller.go b/cdc/processor/pipeline/puller.go index 19bcdc6f9bc..218bdbdcef1 100644 --- a/cdc/processor/pipeline/puller.go +++ b/cdc/processor/pipeline/puller.go @@ -17,12 +17,14 @@ import ( "context" "github.com/pingcap/errors" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/puller" cdcContext "github.com/pingcap/tiflow/pkg/context" "github.com/pingcap/tiflow/pkg/pipeline" pmessage "github.com/pingcap/tiflow/pkg/pipeline/message" "github.com/pingcap/tiflow/pkg/regionspan" + "github.com/pingcap/tiflow/pkg/upstream" "github.com/pingcap/tiflow/pkg/util" "golang.org/x/sync/errgroup" ) @@ -62,25 +64,25 @@ func (n *pullerNode) tableSpan(ctx cdcContext.Context) []regionspan.Span { } func (n *pullerNode) Init(ctx pipeline.NodeContext) error { - return n.start(ctx, new(errgroup.Group), false, nil) + return n.start(ctx, nil, new(errgroup.Group), false, nil) } -func (n *pullerNode) start(ctx pipeline.NodeContext, wg *errgroup.Group, isActorMode bool, sorter *sorterNode) error { +func (n *pullerNode) start(ctx pipeline.NodeContext, upStream *upstream.UpStream, wg *errgroup.Group, isActorMode bool, sorter *sorterNode) error { n.wg = wg ctxC, cancel := context.WithCancel(ctx) - ctxC = util.PutTableInfoInCtx(ctxC, n.tableID, n.tableName) - ctxC = util.PutCaptureAddrInCtx(ctxC, ctx.GlobalVars().CaptureInfo.AdvertiseAddr) - ctxC = util.PutChangefeedIDInCtx(ctxC, ctx.ChangefeedVars().ID) - ctxC = util.PutRoleInCtx(ctxC, util.RoleProcessor) + ctxC = contextutil.PutTableInfoInCtx(ctxC, n.tableID, n.tableName) + ctxC = contextutil.PutCaptureAddrInCtx(ctxC, ctx.GlobalVars().CaptureInfo.AdvertiseAddr) + ctxC = contextutil.PutChangefeedIDInCtx(ctxC, ctx.ChangefeedVars().ID) + ctxC = contextutil.PutRoleInCtx(ctxC, util.RoleProcessor) // NOTICE: always pull the old value internally // See also: https://github.com/pingcap/tiflow/issues/2301. plr := puller.NewPuller( ctxC, - ctx.GlobalVars().PDClient, - ctx.GlobalVars().GrpcPool, - ctx.GlobalVars().RegionCache, - ctx.GlobalVars().KVStorage, - ctx.GlobalVars().PDClock, + upStream.PDClient, + upStream.GrpcPool, + upStream.RegionCache, + upStream.KVStorage, + upStream.PDClock, n.changefeed, n.replicaInfo.StartTs, n.tableSpan(ctx), true) n.wg.Go(func() error { diff --git a/cdc/processor/pipeline/sorter.go b/cdc/processor/pipeline/sorter.go index b117628eaa9..a694ad32be1 100644 --- a/cdc/processor/pipeline/sorter.go +++ b/cdc/processor/pipeline/sorter.go @@ -97,7 +97,7 @@ func createSorter(ctx pipeline.NodeContext, tableName string, tableID model.Tabl case model.SortUnified, model.SortInFile /* `file` becomes an alias of `unified` for backward compatibility */ : if sortEngine == model.SortInFile { log.Warn("File sorter is obsolete and replaced by unified sorter. Please revise your changefeed settings", - zap.String("changefeed", ctx.ChangefeedVars().ID), zap.String("tableName", tableName)) + zap.String("changefeed", ctx.ChangefeedVars().ID.String()), zap.String("tableName", tableName)) } if config.GetGlobalServerConfig().Debug.EnableDBSorter { @@ -154,7 +154,7 @@ func (n *sorterNode) start( lastSendResolvedTsTime := time.Now() // the time at which we last sent a resolved-ts. lastCRTs := uint64(0) // the commit-ts of the last row changed we sent. - metricsTableMemoryHistogram := tableMemoryHistogram.WithLabelValues(ctx.ChangefeedVars().ID) + metricsTableMemoryHistogram := tableMemoryHistogram.WithLabelValues(ctx.ChangefeedVars().ID.ID) metricsTicker := time.NewTicker(flushMemoryMetricsDuration) defer metricsTicker.Stop() @@ -315,7 +315,7 @@ func (n *sorterNode) releaseResource(changefeedID string) { func (n *sorterNode) Destroy(ctx pipeline.NodeContext) error { n.cancel() - n.releaseResource(ctx.ChangefeedVars().ID) + n.releaseResource(ctx.ChangefeedVars().ID.ID) return n.eg.Wait() } diff --git a/cdc/processor/pipeline/table.go b/cdc/processor/pipeline/table.go index b72fb3f14cb..fcc7873cc57 100644 --- a/cdc/processor/pipeline/table.go +++ b/cdc/processor/pipeline/table.go @@ -192,7 +192,7 @@ func NewTablePipeline(ctx cdcContext.Context, perTableMemoryQuota := serverConfig.GetGlobalServerConfig().PerTableMemoryQuota log.Debug("creating table flow controller", - zap.String("changefeed", ctx.ChangefeedVars().ID), + zap.String("changefeed", ctx.ChangefeedVars().ID.String()), zap.String("tableName", tableName), zap.Int64("tableID", tableID), zap.Uint64("quota", perTableMemoryQuota)) @@ -209,7 +209,7 @@ func NewTablePipeline(ctx cdcContext.Context, flowController, mounter, replConfig) sinkNode := newSinkNode(tableID, sink, replicaInfo.StartTs, targetTs, flowController) - p.AppendNode(ctx, "puller", newPullerNode(tableID, replicaInfo, tableName, changefeed)) + p.AppendNode(ctx, "puller", newPullerNode(tableID, replicaInfo, tableName, changefeed.String())) p.AppendNode(ctx, "sorter", sorterNode) if cyclicEnabled { p.AppendNode(ctx, "cyclic", newCyclicMarkNode(replicaInfo.MarkTableID)) diff --git a/cdc/processor/pipeline/table_actor.go b/cdc/processor/pipeline/table_actor.go index 66ca210fbb0..f09162757f4 100644 --- a/cdc/processor/pipeline/table_actor.go +++ b/cdc/processor/pipeline/table_actor.go @@ -32,6 +32,7 @@ import ( cdcContext "github.com/pingcap/tiflow/pkg/context" cerror "github.com/pingcap/tiflow/pkg/errors" pmessage "github.com/pingcap/tiflow/pkg/pipeline/message" + "github.com/pingcap/tiflow/pkg/upstream" "go.uber.org/zap" "golang.org/x/sync/errgroup" ) @@ -48,6 +49,9 @@ type tableActor struct { actorID actor.ID mb actor.Mailbox[pmessage.Message] router *actor.Router[pmessage.Message] + + upStream *upstream.UpStream + // all goroutines in tableActor should be spawned from this wg wg *errgroup.Group // backend mounter @@ -91,6 +95,7 @@ type tableActor struct { // NewTableActor creates a table actor and starts it. func NewTableActor(cdcCtx cdcContext.Context, + upStream *upstream.UpStream, mounter entry.Mounter, tableID model.TableID, tableName string, @@ -122,6 +127,7 @@ func NewTableActor(cdcCtx cdcContext.Context, tableName: tableName, cyclicEnabled: cyclicEnabled, memoryQuota: serverConfig.GetGlobalServerConfig().PerTableMemoryQuota, + upStream: upStream, mounter: mounter, replicaInfo: replicaInfo, replicaConfig: config, @@ -129,7 +135,7 @@ func NewTableActor(cdcCtx cdcContext.Context, targetTs: targetTs, started: false, - changefeedID: changefeedVars.ID, + changefeedID: changefeedVars.ID.ID, changefeedVars: changefeedVars, globalVars: globalVars, router: globalVars.TableActorSystem.Router(), @@ -280,7 +286,7 @@ func (t *tableActor) start(sdtTableContext context.Context) error { return err } - pullerNode := newPullerNode(t.tableID, t.replicaInfo, t.tableName, t.changefeedVars.ID) + pullerNode := newPullerNode(t.tableID, t.replicaInfo, t.tableName, t.changefeedVars.ID.ID) pullerActorNodeContext := newContext(sdtTableContext, t.tableName, t.globalVars.TableActorSystem.Router(), @@ -497,7 +503,7 @@ func (t *tableActor) Wait() { // for ut var startPuller = func(t *tableActor, ctx *actorNodeContext) error { - return t.pullerNode.start(ctx, t.wg, true, t.sortNode) + return t.pullerNode.start(ctx, t.upStream, t.wg, true, t.sortNode) } var startSorter = func(t *tableActor, ctx *actorNodeContext) error { diff --git a/cdc/processor/processor.go b/cdc/processor/processor.go index 47f113f5b35..e1104ad72b9 100644 --- a/cdc/processor/processor.go +++ b/cdc/processor/processor.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/entry" "github.com/pingcap/tiflow/cdc/kv" "github.com/pingcap/tiflow/cdc/model" @@ -42,6 +43,7 @@ import ( "github.com/pingcap/tiflow/pkg/orchestrator" "github.com/pingcap/tiflow/pkg/regionspan" "github.com/pingcap/tiflow/pkg/retry" + "github.com/pingcap/tiflow/pkg/upstream" "github.com/pingcap/tiflow/pkg/util" "github.com/prometheus/client_golang/prometheus" "github.com/tikv/client-go/v2/oracle" @@ -54,10 +56,13 @@ const ( ) type processor struct { + clusterID uint64 changefeedID model.ChangeFeedID captureInfo *model.CaptureInfo changefeed *orchestrator.ChangefeedReactorState + upStream *upstream.UpStream + tables map[model.TableID]tablepipeline.TablePipeline schemaStorage entry.SchemaStorage @@ -225,10 +230,11 @@ func (p *processor) GetCheckpoint() (checkpointTs, resolvedTs model.Ts) { } // newProcessor creates a new processor -func newProcessor(ctx cdcContext.Context) *processor { +func newProcessor(ctx cdcContext.Context, upStream *upstream.UpStream) *processor { changefeedID := ctx.ChangefeedVars().ID conf := config.GetGlobalServerConfig() p := &processor{ + upStream: upStream, tables: make(map[model.TableID]tablepipeline.TablePipeline), errCh: make(chan error, 1), changefeedID: changefeedID, @@ -238,16 +244,16 @@ func newProcessor(ctx cdcContext.Context) *processor { newSchedulerEnabled: conf.Debug.EnableNewScheduler, - metricResolvedTsGauge: resolvedTsGauge.WithLabelValues(changefeedID), - metricResolvedTsLagGauge: resolvedTsLagGauge.WithLabelValues(changefeedID), - metricMinResolvedTableIDGuage: resolvedTsMinTableIDGauge.WithLabelValues(changefeedID), - metricCheckpointTsGauge: checkpointTsGauge.WithLabelValues(changefeedID), - metricCheckpointTsLagGauge: checkpointTsLagGauge.WithLabelValues(changefeedID), - metricMinCheckpointTableIDGuage: checkpointTsMinTableIDGauge.WithLabelValues(changefeedID), - metricSyncTableNumGauge: syncTableNumGauge.WithLabelValues(changefeedID), - metricProcessorErrorCounter: processorErrorCounter.WithLabelValues(changefeedID), - metricSchemaStorageGcTsGauge: processorSchemaStorageGcTsGauge.WithLabelValues(changefeedID), - metricProcessorTickDuration: processorTickDuration.WithLabelValues(changefeedID), + metricResolvedTsGauge: resolvedTsGauge.WithLabelValues(changefeedID.String()), + metricResolvedTsLagGauge: resolvedTsLagGauge.WithLabelValues(changefeedID.String()), + metricMinResolvedTableIDGuage: resolvedTsMinTableIDGauge.WithLabelValues(changefeedID.String()), + metricCheckpointTsGauge: checkpointTsGauge.WithLabelValues(changefeedID.String()), + metricCheckpointTsLagGauge: checkpointTsLagGauge.WithLabelValues(changefeedID.String()), + metricMinCheckpointTableIDGuage: checkpointTsMinTableIDGauge.WithLabelValues(changefeedID.String()), + metricSyncTableNumGauge: syncTableNumGauge.WithLabelValues(changefeedID.String()), + metricProcessorErrorCounter: processorErrorCounter.WithLabelValues(changefeedID.String()), + metricSchemaStorageGcTsGauge: processorSchemaStorageGcTsGauge.WithLabelValues(changefeedID.String()), + metricProcessorTickDuration: processorTickDuration.WithLabelValues(changefeedID.String()), } p.createTablePipeline = p.createTablePipelineImpl p.lazyInit = p.lazyInitImpl @@ -293,7 +299,7 @@ func (p *processor) Tick(ctx cdcContext.Context, state *orchestrator.ChangefeedR costTime := time.Since(startTime) if costTime > processorLogsWarnDuration { - log.Warn("processor tick took too long", zap.String("changefeed", p.changefeedID), + log.Warn("processor tick took too long", zap.String("changefeed", p.changefeedID.String()), zap.String("capture", ctx.GlobalVars().CaptureInfo.ID), zap.Duration("duration", costTime)) } p.metricProcessorTickDuration.Observe(costTime.Seconds()) @@ -359,7 +365,7 @@ func (p *processor) tick(ctx cdcContext.Context, state *orchestrator.ChangefeedR } // it is no need to check the error here, because we will use // local time when an error return, which is acceptable - pdTime, _ := ctx.GlobalVars().PDClock.CurrentTime() + pdTime, _ := p.upStream.PDClock.CurrentTime() p.handlePosition(oracle.GetPhysical(pdTime)) p.pushResolvedTs2Table() @@ -424,7 +430,7 @@ func (p *processor) lazyInitImpl(ctx cdcContext.Context) error { } ctx, cancel := cdcContext.WithCancel(ctx) p.cancel = cancel - + p.clusterID = p.upStream.PDClient.GetClusterID(ctx) // We don't close this error channel, since it is only safe to close channel // in sender, and this channel will be used in many modules including sink, // redo log manager, etc. Let runtime GC to recycle it. @@ -460,12 +466,12 @@ func (p *processor) lazyInitImpl(ctx cdcContext.Context) error { return errors.Trace(err) } - stdCtx := util.PutChangefeedIDInCtx(ctx, p.changefeed.ID) - stdCtx = util.PutRoleInCtx(stdCtx, util.RoleProcessor) + stdCtx := contextutil.PutChangefeedIDInCtx(ctx, p.changefeed.ID) + stdCtx = contextutil.PutRoleInCtx(stdCtx, util.RoleProcessor) p.mounter = entry.NewMounter(p.schemaStorage, - p.changefeedID, - util.TimezoneFromCtx(ctx), + p.changefeedID.String(), + contextutil.TimezoneFromCtx(ctx), p.changefeed.Info.Config.EnableOldValue) opts := make(map[string]string, len(p.changefeed.Info.Opts)+2) @@ -481,15 +487,15 @@ func (p *processor) lazyInitImpl(ctx cdcContext.Context) error { } opts[mark.OptCyclicConfig] = cyclicCfg } - opts[metrics.OptChangefeedID] = p.changefeed.ID + opts[metrics.OptChangefeedID] = p.changefeed.ID.ID opts[metrics.OptCaptureAddr] = ctx.GlobalVars().CaptureInfo.AdvertiseAddr - log.Info("processor try new sink", zap.String("changefeed", p.changefeed.ID)) + log.Info("processor try new sink", zap.String("changefeed", p.changefeed.ID.String())) start := time.Now() s, err := sink.New(stdCtx, p.changefeed.ID, p.changefeed.Info.SinkURI, p.filter, p.changefeed.Info.Config, opts, errCh) if err != nil { log.Info("processor new sink failed", - zap.String("changefeed", p.changefeed.ID), + zap.String("changefeed", p.changefeed.ID.String()), zap.Duration("duration", time.Since(start))) return errors.Trace(err) } @@ -670,20 +676,20 @@ func (p *processor) handleTableOperation(ctx cdcContext.Context) error { } func (p *processor) createAndDriveSchemaStorage(ctx cdcContext.Context) (entry.SchemaStorage, error) { - kvStorage := ctx.GlobalVars().KVStorage + kvStorage := p.upStream.KVStorage ddlspans := []regionspan.Span{regionspan.GetDDLSpan(), regionspan.GetAddIndexDDLSpan()} checkpointTs := p.changefeed.Info.GetCheckpointTs(p.changefeed.Status) - stdCtx := util.PutTableInfoInCtx(ctx, -1, puller.DDLPullerTableName) - stdCtx = util.PutChangefeedIDInCtx(stdCtx, ctx.ChangefeedVars().ID) - stdCtx = util.PutRoleInCtx(stdCtx, util.RoleProcessor) + stdCtx := contextutil.PutTableInfoInCtx(ctx, -1, puller.DDLPullerTableName) + stdCtx = contextutil.PutChangefeedIDInCtx(stdCtx, ctx.ChangefeedVars().ID) + stdCtx = contextutil.PutRoleInCtx(stdCtx, util.RoleProcessor) ddlPuller := puller.NewPuller( stdCtx, - ctx.GlobalVars().PDClient, - ctx.GlobalVars().GrpcPool, - ctx.GlobalVars().RegionCache, - ctx.GlobalVars().KVStorage, - ctx.GlobalVars().PDClock, - ctx.ChangefeedVars().ID, + p.upStream.PDClient, + p.upStream.GrpcPool, + p.upStream.RegionCache, + p.upStream.KVStorage, + p.upStream.PDClock, + ctx.ChangefeedVars().ID.String(), checkpointTs, ddlspans, false) meta, err := kv.GetSnapshotMeta(kvStorage, checkpointTs) if err != nil { @@ -845,7 +851,7 @@ func (p *processor) handlePosition(currentTs int64) { if position == nil { // when the captureInfo is deleted, the old owner will delete task status, task position, task workload in non-atomic // so processor may see a intermediate state, for example the task status is exist but task position is deleted. - log.Warn("task position is not exist, skip to update position", zap.String("changefeed", p.changefeed.ID)) + log.Warn("task position is not exist, skip to update position", zap.String("changefeed", p.changefeed.ID.String())) return nil, false, nil } position.CheckPointTs = minCheckpointTs @@ -1011,6 +1017,7 @@ func (p *processor) createTablePipelineImpl( var err error table, err = tablepipeline.NewTableActor( ctx, + p.upStream, p.mounter, tableID, tableName, @@ -1096,7 +1103,7 @@ func (p *processor) flushRedoLogMeta(ctx context.Context) error { } func (p *processor) Close() error { - log.Info("processor closing ...", zap.String("changefeed", p.changefeedID)) + log.Info("processor closing ...", zap.String("changefeed", p.changefeedID.String())) for _, tbl := range p.tables { tbl.Cancel() } @@ -1122,28 +1129,28 @@ func (p *processor) Close() error { ctx, cancel := context.WithCancel(context.Background()) cancel() log.Info("processor try to close the sinkManager", - zap.String("changefeed", p.changefeedID)) + zap.String("changefeed", p.changefeedID.String())) start := time.Now() if err := p.sinkManager.Close(ctx); err != nil { log.Info("processor close sinkManager failed", - zap.String("changefeed", p.changefeedID), + zap.String("changefeed", p.changefeedID.String()), zap.Duration("duration", time.Since(start))) return errors.Trace(err) } log.Info("processor close sinkManager success", - zap.String("changefeed", p.changefeedID), + zap.String("changefeed", p.changefeedID.String()), zap.Duration("duration", time.Since(start))) } - + upstream.UpStreamManager.Release(p.clusterID) // mark tables share the same cdcContext with its original table, don't need to cancel failpoint.Inject("processorStopDelay", nil) - resolvedTsGauge.DeleteLabelValues(p.changefeedID) - resolvedTsLagGauge.DeleteLabelValues(p.changefeedID) - checkpointTsGauge.DeleteLabelValues(p.changefeedID) - checkpointTsLagGauge.DeleteLabelValues(p.changefeedID) - syncTableNumGauge.DeleteLabelValues(p.changefeedID) - processorErrorCounter.DeleteLabelValues(p.changefeedID) - processorSchemaStorageGcTsGauge.DeleteLabelValues(p.changefeedID) + resolvedTsGauge.DeleteLabelValues(p.changefeedID.String()) + resolvedTsLagGauge.DeleteLabelValues(p.changefeedID.String()) + checkpointTsGauge.DeleteLabelValues(p.changefeedID.String()) + checkpointTsLagGauge.DeleteLabelValues(p.changefeedID.String()) + syncTableNumGauge.DeleteLabelValues(p.changefeedID.String()) + processorErrorCounter.DeleteLabelValues(p.changefeedID.String()) + processorSchemaStorageGcTsGauge.DeleteLabelValues(p.changefeedID.String()) return nil } diff --git a/cdc/puller/puller.go b/cdc/puller/puller.go index 2754e5f76e5..164b614294f 100644 --- a/cdc/puller/puller.go +++ b/cdc/puller/puller.go @@ -21,13 +21,13 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" tidbkv "github.com/pingcap/tidb/kv" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/kv" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/puller/frontier" "github.com/pingcap/tiflow/pkg/pdtime" "github.com/pingcap/tiflow/pkg/regionspan" "github.com/pingcap/tiflow/pkg/txnutil" - "github.com/pingcap/tiflow/pkg/util" "github.com/tikv/client-go/v2/oracle" "github.com/tikv/client-go/v2/tikv" pd "github.com/tikv/pd/client" @@ -113,7 +113,7 @@ func (p *pullerImpl) Run(ctx context.Context) error { eventCh := make(chan model.RegionFeedEvent, defaultPullerEventChanSize) lockResolver := txnutil.NewLockerResolver(p.kvStorage, - util.ChangefeedIDFromCtx(ctx), util.RoleFromCtx(ctx)) + contextutil.ChangefeedIDFromCtx(ctx), contextutil.RoleFromCtx(ctx)) for _, span := range p.spans { span := span @@ -123,22 +123,22 @@ func (p *pullerImpl) Run(ctx context.Context) error { }) } - changefeedID := util.ChangefeedIDFromCtx(ctx) - tableID, _ := util.TableIDFromCtx(ctx) - metricOutputChanSize := outputChanSizeHistogram.WithLabelValues(changefeedID) - metricEventChanSize := eventChanSizeHistogram.WithLabelValues(changefeedID) - metricPullerResolvedTs := pullerResolvedTsGauge.WithLabelValues(changefeedID) - metricTxnCollectCounterKv := txnCollectCounter.WithLabelValues(changefeedID, "kv") - metricTxnCollectCounterResolved := txnCollectCounter.WithLabelValues(changefeedID, "resolved") + changefeedID := contextutil.ChangefeedIDFromCtx(ctx) + tableID, _ := contextutil.TableIDFromCtx(ctx) + metricOutputChanSize := outputChanSizeHistogram.WithLabelValues(changefeedID.ID) + metricEventChanSize := eventChanSizeHistogram.WithLabelValues(changefeedID.ID) + metricPullerResolvedTs := pullerResolvedTsGauge.WithLabelValues(changefeedID.ID) + metricTxnCollectCounterKv := txnCollectCounter.WithLabelValues(changefeedID.ID, "kv") + metricTxnCollectCounterResolved := txnCollectCounter.WithLabelValues(changefeedID.ID, "resolved") defer func() { - outputChanSizeHistogram.DeleteLabelValues(changefeedID) - eventChanSizeHistogram.DeleteLabelValues(changefeedID) - memBufferSizeGauge.DeleteLabelValues(changefeedID) - pullerResolvedTsGauge.DeleteLabelValues(changefeedID) - kvEventCounter.DeleteLabelValues(changefeedID, "kv") - kvEventCounter.DeleteLabelValues(changefeedID, "resolved") - txnCollectCounter.DeleteLabelValues(changefeedID, "kv") - txnCollectCounter.DeleteLabelValues(changefeedID, "resolved") + outputChanSizeHistogram.DeleteLabelValues(changefeedID.ID) + eventChanSizeHistogram.DeleteLabelValues(changefeedID.ID) + memBufferSizeGauge.DeleteLabelValues(changefeedID.ID) + pullerResolvedTsGauge.DeleteLabelValues(changefeedID.ID) + kvEventCounter.DeleteLabelValues(changefeedID.ID, "kv") + kvEventCounter.DeleteLabelValues(changefeedID.ID, "resolved") + txnCollectCounter.DeleteLabelValues(changefeedID.ID, "kv") + txnCollectCounter.DeleteLabelValues(changefeedID.ID, "resolved") }() lastResolvedTs := p.checkpointTs @@ -153,7 +153,7 @@ func (p *pullerImpl) Run(ctx context.Context) error { // resolved ts is not broken. if raw.CRTs < p.resolvedTs || (raw.CRTs == p.resolvedTs && raw.OpType != model.OpTypeResolved) { log.Warn("The CRTs is fallen back in puller", - zap.String("changefeed", changefeedID), + zap.String("changefeed", changefeedID.ID), zap.Reflect("row", raw), zap.Uint64("CRTs", raw.CRTs), zap.Uint64("resolvedTs", p.resolvedTs), @@ -195,7 +195,7 @@ func (p *pullerImpl) Run(ctx context.Context) error { metricTxnCollectCounterResolved.Inc() if !regionspan.IsSubSpan(e.Resolved.Span, p.spans...) { log.Panic("the resolved span is not in the total span", - zap.String("changefeed", changefeedID), + zap.String("changefeed", changefeedID.ID), zap.Reflect("resolved", e.Resolved), zap.Int64("tableID", tableID), zap.Reflect("spans", p.spans), @@ -215,7 +215,7 @@ func (p *pullerImpl) Run(ctx context.Context) error { spans = append(spans, p.spans[i].String()) } log.Info("puller is initialized", - zap.String("changefeed", changefeedID), + zap.String("changefeed", changefeedID.ID), zap.Duration("duration", time.Since(start)), zap.Int64("tableID", tableID), zap.Strings("spans", spans), diff --git a/cdc/redo/manager.go b/cdc/redo/manager.go index 96b979dde41..656672f7108 100644 --- a/cdc/redo/manager.go +++ b/cdc/redo/manager.go @@ -24,11 +24,11 @@ import ( "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/redo/writer" "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" - "github.com/pingcap/tiflow/pkg/util" "go.uber.org/zap" ) @@ -165,9 +165,9 @@ func NewManager(ctx context.Context, cfg *config.ConsistentConfig, opts *Manager m.writer = writer.NewBlackHoleWriter() case consistentStorageLocal, consistentStorageNFS, consistentStorageS3: globalConf := config.GetGlobalServerConfig() - changeFeedID := util.ChangefeedIDFromCtx(ctx) + changeFeedID := contextutil.ChangefeedIDFromCtx(ctx) // We use a temporary dir to storage redo logs before flushing to other backends, such as S3 - redoDir := filepath.Join(globalConf.DataDir, config.DefaultRedoDir, changeFeedID) + redoDir := filepath.Join(globalConf.DataDir, config.DefaultRedoDir, changeFeedID.Namespace, changeFeedID.ID) if m.storageType == consistentStorageLocal || m.storageType == consistentStorageNFS { // When using local or nfs as backend, store redo logs to redoDir directly. redoDir = uri.Path @@ -175,7 +175,7 @@ func NewManager(ctx context.Context, cfg *config.ConsistentConfig, opts *Manager writerCfg := &writer.LogWriterConfig{ Dir: redoDir, - CaptureID: util.CaptureAddrFromCtx(ctx), + CaptureID: contextutil.CaptureAddrFromCtx(ctx), ChangeFeedID: changeFeedID, CreateTime: time.Now(), MaxLogSize: cfg.MaxLogSize, diff --git a/cdc/redo/writer/file.go b/cdc/redo/writer/file.go index 2e263e357a7..c8330effaf9 100644 --- a/cdc/redo/writer/file.go +++ b/cdc/redo/writer/file.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/storage" + "github.com/pingcap/tiflow/cdc/model" "github.com/prometheus/client_golang/prometheus" "github.com/uber-go/atomic" pioutil "go.etcd.io/etcd/pkg/v3/ioutil" @@ -77,7 +78,7 @@ type flusher interface { // FileWriterConfig is the configuration used by a Writer. type FileWriterConfig struct { Dir string - ChangeFeedID string + ChangeFeedID model.ChangeFeedID CaptureID string FileType string CreateTime time.Time @@ -160,9 +161,9 @@ func NewWriter(ctx context.Context, cfg *FileWriterConfig, opts ...Option) (*Wri uint64buf: make([]byte, 8), storage: s3storage, - metricFsyncDuration: redoFsyncDurationHistogram.WithLabelValues(cfg.ChangeFeedID), - metricFlushAllDuration: redoFlushAllDurationHistogram.WithLabelValues(cfg.ChangeFeedID), - metricWriteBytes: redoWriteBytesGauge.WithLabelValues(cfg.ChangeFeedID), + metricFsyncDuration: redoFsyncDurationHistogram.WithLabelValues(cfg.ChangeFeedID.ID), + metricFlushAllDuration: redoFlushAllDurationHistogram.WithLabelValues(cfg.ChangeFeedID.ID), + metricWriteBytes: redoWriteBytesGauge.WithLabelValues(cfg.ChangeFeedID.ID), } w.running.Store(true) @@ -184,12 +185,12 @@ func (w *Writer) runFlushToDisk(ctx context.Context, flushIntervalInMs int64) { case <-ctx.Done(): err := w.Close() if err != nil { - log.Error("runFlushToDisk close fail", zap.String("changefeed", w.cfg.ChangeFeedID), zap.Error(err)) + log.Error("runFlushToDisk close fail", zap.String("changefeed", w.cfg.ChangeFeedID.ID), zap.Error(err)) } case <-ticker.C: err := w.Flush() if err != nil { - log.Error("redo log flush fail", zap.String("changefeed", w.cfg.ChangeFeedID), zap.Error(err)) + log.Error("redo log flush fail", zap.String("changefeed", w.cfg.ChangeFeedID.ID), zap.Error(err)) } } } @@ -272,9 +273,9 @@ func (w *Writer) Close() error { return nil } - redoFlushAllDurationHistogram.DeleteLabelValues(w.cfg.ChangeFeedID) - redoFsyncDurationHistogram.DeleteLabelValues(w.cfg.ChangeFeedID) - redoWriteBytesGauge.DeleteLabelValues(w.cfg.ChangeFeedID) + redoFlushAllDurationHistogram.DeleteLabelValues(w.cfg.ChangeFeedID.ID) + redoFsyncDurationHistogram.DeleteLabelValues(w.cfg.ChangeFeedID.ID) + redoWriteBytesGauge.DeleteLabelValues(w.cfg.ChangeFeedID.ID) return w.close() } diff --git a/cdc/redo/writer/writer.go b/cdc/redo/writer/writer.go index 97be4ea39fa..ff58f99881d 100644 --- a/cdc/redo/writer/writer.go +++ b/cdc/redo/writer/writer.go @@ -71,7 +71,7 @@ type RedoLogWriter interface { var defaultGCIntervalInMs = 5000 var ( - logWriters = map[string]*LogWriter{} + logWriters = map[model.ChangeFeedID]*LogWriter{} initLock sync.Mutex ) @@ -84,7 +84,7 @@ var redoLogPool = sync.Pool{ // LogWriterConfig is the configuration used by a Writer. type LogWriterConfig struct { Dir string - ChangeFeedID string + ChangeFeedID model.ChangeFeedID CaptureID string CreateTime time.Time // MaxLogSize is the maximum size of log in megabyte, defaults to defaultMaxLogSize. @@ -163,7 +163,7 @@ func NewLogWriter(ctx context.Context, cfg *LogWriterConfig) (*LogWriter, error) err = logWriter.initMeta(ctx) if err != nil { log.Warn("init redo meta fail", - zap.String("changefeed", cfg.ChangeFeedID), + zap.String("changefeed", cfg.ChangeFeedID.ID), zap.Error(err)) } if cfg.S3Storage { @@ -188,7 +188,7 @@ func NewLogWriter(ctx context.Context, cfg *LogWriterConfig) (*LogWriter, error) } } - logWriter.metricTotalRowsCount = redoTotalRowsCountGauge.WithLabelValues(cfg.ChangeFeedID) + logWriter.metricTotalRowsCount = redoTotalRowsCountGauge.WithLabelValues(cfg.ChangeFeedID.ID) logWriters[cfg.ChangeFeedID] = logWriter go logWriter.runGC(ctx) return logWriter, nil @@ -274,12 +274,12 @@ func (l *LogWriter) runGC(ctx context.Context) { case <-ctx.Done(): err := l.Close() if err != nil { - log.Error("runGC close fail", zap.String("changefeed", l.cfg.ChangeFeedID), zap.Error(err)) + log.Error("runGC close fail", zap.String("changefeed", l.cfg.ChangeFeedID.ID), zap.Error(err)) } case <-ticker.C: err := l.gc() if err != nil { - log.Error("redo log GC fail", zap.String("changefeed", l.cfg.ChangeFeedID), zap.Error(err)) + log.Error("redo log GC fail", zap.String("changefeed", l.cfg.ChangeFeedID.ID), zap.Error(err)) } } } @@ -546,7 +546,7 @@ var getAllFilesInS3 = func(ctx context.Context, l *LogWriter) ([]string, error) // Close implements RedoLogWriter.Close. func (l *LogWriter) Close() error { - redoTotalRowsCountGauge.DeleteLabelValues(l.cfg.ChangeFeedID) + redoTotalRowsCountGauge.DeleteLabelValues(l.cfg.ChangeFeedID.ID) var err error err = multierr.Append(err, l.rowWriter.Close()) diff --git a/cdc/scheduler/agent.go b/cdc/scheduler/agent.go index d1bb690017c..d7a237e7962 100644 --- a/cdc/scheduler/agent.go +++ b/cdc/scheduler/agent.go @@ -141,7 +141,7 @@ func NewBaseAgent( messenger ProcessorMessenger, config *BaseAgentConfig, ) *BaseAgent { - logger := log.L().With(zap.String("changefeed", changeFeedID)) + logger := log.L().With(zap.String("changefeed", changeFeedID.String())) ret := &BaseAgent{ pendingOps: deque.NewDeque(), tableOperations: map[model.TableID]*agentOperation{}, diff --git a/cdc/scheduler/schedule_dispatcher.go b/cdc/scheduler/schedule_dispatcher.go index 2b133d71eb8..d6c32b4892f 100644 --- a/cdc/scheduler/schedule_dispatcher.go +++ b/cdc/scheduler/schedule_dispatcher.go @@ -105,7 +105,7 @@ func NewBaseScheduleDispatcher( checkpointTs model.Ts, ) *BaseScheduleDispatcher { // logger is just the global logger with the `changefeed-id` field attached. - logger := log.L().With(zap.String("changefeed", changeFeedID)) + logger := log.L().With(zap.String("changefeed", changeFeedID.String())) return &BaseScheduleDispatcher{ tables: util.NewTableSet(), diff --git a/cdc/server.go b/cdc/server.go index a4a6aaaceb0..3f64047e4ce 100644 --- a/cdc/server.go +++ b/cdc/server.go @@ -21,7 +21,6 @@ import ( "net/http" "os" "path/filepath" - "strings" "time" "github.com/gin-gonic/gin" @@ -48,7 +47,7 @@ import ( "github.com/pingcap/tiflow/pkg/httputil" "github.com/pingcap/tiflow/pkg/p2p" "github.com/pingcap/tiflow/pkg/tcpserver" - "github.com/pingcap/tiflow/pkg/util" + "github.com/pingcap/tiflow/pkg/upstream" "github.com/pingcap/tiflow/pkg/version" p2pProto "github.com/pingcap/tiflow/proto/p2p" ) @@ -118,6 +117,7 @@ func (s *Server) Run(ctx context.Context) error { return errors.Trace(err) } + // 此处的 pdClient 用来做版本检查,之后想办法去掉 pdClient, err := pd.NewClientWithContext( ctx, s.pdEndpoints, conf.Security.PDSecurityOption(), pd.WithGRPCDialOptions( @@ -187,18 +187,19 @@ func (s *Server) Run(ctx context.Context) error { } kv.InitWorkerPool() - kvStore, err := kv.CreateTiStore(strings.Join(s.pdEndpoints, ","), conf.Security) - if err != nil { - return errors.Trace(err) - } - defer func() { - err := kvStore.Close() - if err != nil { - log.Warn("kv store close failed", zap.Error(err)) - } - }() - s.kvStorage = kvStore - ctx = util.PutKVStorageInCtx(ctx, kvStore) + + // kvStore, err := kv.CreateTiStore(strings.Join(s.pdEndpoints, ","), conf.Security) + // if err != nil { + // return errors.Trace(err) + // } + // defer func() { + // err := kvStore.Close() + // if err != nil { + // log.Warn("kv store close failed", zap.Error(err)) + // } + // }() + // s.kvStorage = kvStore + // ctx = contextutil.PutKVStorageInCtx(ctx, kvStore) s.capture = capture.NewCapture(s.pdClient, s.kvStorage, s.etcdClient, s.grpcService) @@ -207,6 +208,9 @@ func (s *Server) Run(ctx context.Context) error { return err } + // 全局变量 + upstream.UpStreamManager = upstream.NewManager(ctx) + return s.run(ctx) } diff --git a/cdc/sink/buffer_sink.go b/cdc/sink/buffer_sink.go index c830bfa3588..b695536b5cb 100644 --- a/cdc/sink/buffer_sink.go +++ b/cdc/sink/buffer_sink.go @@ -22,9 +22,9 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/sink/metrics" - "github.com/pingcap/tiflow/pkg/util" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" ) @@ -144,7 +144,7 @@ func (b *bufferSink) runOnce(ctx context.Context, state *runState) (bool, error) log.Warn("flush row changed events too slow", zap.Int("batchSize", batchSize), zap.Duration("duration", elapsed), - util.ZapFieldChangefeed(ctx)) + contextutil.ZapFieldChangefeed(ctx)) } return true, nil diff --git a/cdc/sink/metrics/statistics.go b/cdc/sink/metrics/statistics.go index ef7e91265cf..72b5046c44a 100644 --- a/cdc/sink/metrics/statistics.go +++ b/cdc/sink/metrics/statistics.go @@ -19,8 +19,8 @@ import ( "time" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" - "github.com/pingcap/tiflow/pkg/util" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" ) @@ -58,27 +58,27 @@ func (t sinkType) String() string { func NewStatistics(ctx context.Context, t sinkType) *Statistics { statistics := &Statistics{ sinkType: t, - changefeedID: util.ChangefeedIDFromCtx(ctx), + changefeedID: contextutil.ChangefeedIDFromCtx(ctx), lastPrintStatusTime: time.Now(), } s := t.String() - statistics.metricExecTxnHis = ExecTxnHistogram.WithLabelValues(statistics.changefeedID, s) - statistics.metricExecBatchHis = ExecBatchHistogram.WithLabelValues(statistics.changefeedID, s) - statistics.metricRowSizesHis = LargeRowSizeHistogram.WithLabelValues(statistics.changefeedID, s) - statistics.metricExecDDLHis = ExecDDLHistogram.WithLabelValues(statistics.changefeedID, s) - statistics.metricExecErrCnt = ExecutionErrorCounter.WithLabelValues(statistics.changefeedID) + statistics.metricExecTxnHis = ExecTxnHistogram.WithLabelValues(statistics.changefeedID.ID, s) + statistics.metricExecBatchHis = ExecBatchHistogram.WithLabelValues(statistics.changefeedID.ID, s) + statistics.metricRowSizesHis = LargeRowSizeHistogram.WithLabelValues(statistics.changefeedID.ID, s) + statistics.metricExecDDLHis = ExecDDLHistogram.WithLabelValues(statistics.changefeedID.ID, s) + statistics.metricExecErrCnt = ExecutionErrorCounter.WithLabelValues(statistics.changefeedID.ID) // Flush metrics in background for better accuracy and efficiency. changefeedID := statistics.changefeedID ticker := time.NewTicker(flushMetricsInterval) go func() { defer ticker.Stop() - metricTotalRows := TotalRowsCountGauge.WithLabelValues(changefeedID) - metricTotalFlushedRows := TotalFlushedRowsCountGauge.WithLabelValues(changefeedID) + metricTotalRows := TotalRowsCountGauge.WithLabelValues(changefeedID.ID) + metricTotalFlushedRows := TotalFlushedRowsCountGauge.WithLabelValues(changefeedID.ID) defer func() { - TotalRowsCountGauge.DeleteLabelValues(changefeedID) - TotalFlushedRowsCountGauge.DeleteLabelValues(changefeedID) + TotalRowsCountGauge.DeleteLabelValues(changefeedID.ID) + TotalFlushedRowsCountGauge.DeleteLabelValues(changefeedID.ID) }() for { select { @@ -97,7 +97,7 @@ func NewStatistics(ctx context.Context, t sinkType) *Statistics { // Statistics maintains some status and metrics of the Sink type Statistics struct { sinkType sinkType - changefeedID string + changefeedID model.ChangeFeedID totalRows uint64 totalFlushedRows uint64 totalDDLCount uint64 @@ -190,8 +190,8 @@ func (b *Statistics) PrintStatus(ctx context.Context) { log.Info("sink replication status", zap.Stringer("sinkType", b.sinkType), - zap.String("changefeed", b.changefeedID), - util.ZapFieldCapture(ctx), + zap.String("changefeed", b.changefeedID.ID), + contextutil.ZapFieldCapture(ctx), zap.Uint64("count", count), zap.Uint64("qps", qps), zap.Uint64("ddl", totalDDLCount)) diff --git a/cdc/sink/mq/mq.go b/cdc/sink/mq/mq.go index c4a7456eeda..7be66e29249 100644 --- a/cdc/sink/mq/mq.go +++ b/cdc/sink/mq/mq.go @@ -22,6 +22,7 @@ import ( "github.com/Shopify/sarama" "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/sink/codec" "github.com/pingcap/tiflow/cdc/sink/metrics" @@ -90,8 +91,8 @@ func newMqSink( return nil, errors.Trace(err) } - changefeedID := util.ChangefeedIDFromCtx(ctx) - role := util.RoleFromCtx(ctx) + changefeedID := contextutil.ChangefeedIDFromCtx(ctx) + role := contextutil.RoleFromCtx(ctx) encoder := encoderBuilder.Build() statistics := metrics.NewStatistics(ctx, metrics.SinkTypeMQ) @@ -119,7 +120,7 @@ func newMqSink( case errCh <- err: default: log.Error("error channel is full", zap.Error(err), - zap.String("changefeed", changefeedID), zap.Any("role", s.role)) + zap.String("changefeed", changefeedID.ID), zap.Any("role", s.role)) } } }() @@ -148,7 +149,7 @@ func (k *mqSink) EmitRowChangedEvents(ctx context.Context, rows ...*model.RowCha if k.filter.ShouldIgnoreDMLEvent(row.StartTs, row.Table.Schema, row.Table.Table) { log.Info("Row changed event ignored", zap.Uint64("start-ts", row.StartTs), - zap.String("changefeed", k.id), + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) continue } @@ -274,7 +275,7 @@ func (k *mqSink) EmitDDLEvent(ctx context.Context, ddl *model.DDLEvent) error { zap.String("query", ddl.Query), zap.Uint64("startTs", ddl.StartTs), zap.Uint64("commitTs", ddl.CommitTs), - zap.String("changefeed", k.id), + zap.String("changefeed", k.id.ID), zap.Any("role", k.role), ) return cerror.ErrDDLEventIgnored.GenWithStackByArgs() @@ -294,7 +295,7 @@ func (k *mqSink) EmitDDLEvent(ctx context.Context, ddl *model.DDLEvent) error { k.statistics.AddDDLCount() log.Debug("emit ddl event", zap.Uint64("commitTs", ddl.CommitTs), zap.String("query", ddl.Query), - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) if partitionRule == dispatcher.PartitionAll { partitionNum, err := k.topicManager.Partitions(topic) if err != nil { @@ -390,7 +391,7 @@ func NewKafkaSaramaSink(ctx context.Context, sinkURI *url.URL, return nil, cerror.WrapError(cerror.ErrKafkaInvalidConfig, err) } - encoderConfig := codec.NewConfig(protocol, util.TimezoneFromCtx(ctx)) + encoderConfig := codec.NewConfig(protocol, contextutil.TimezoneFromCtx(ctx)) if err := encoderConfig.Apply(sinkURI, opts); err != nil { return nil, cerror.WrapError(cerror.ErrKafkaInvalidConfig, err) } @@ -463,7 +464,7 @@ func NewPulsarSink(ctx context.Context, sinkURI *url.URL, filter *filter.Filter, return nil, cerror.WrapError(cerror.ErrKafkaInvalidConfig, err) } - encoderConfig := codec.NewConfig(protocol, util.TimezoneFromCtx(ctx)) + encoderConfig := codec.NewConfig(protocol, contextutil.TimezoneFromCtx(ctx)) if err := encoderConfig.Apply(sinkURI, opts); err != nil { return nil, errors.Trace(err) } diff --git a/cdc/sink/mq/producer/kafka/config.go b/cdc/sink/mq/producer/kafka/config.go index 85fc6e4e431..50a4893a635 100644 --- a/cdc/sink/mq/producer/kafka/config.go +++ b/cdc/sink/mq/producer/kafka/config.go @@ -23,10 +23,10 @@ import ( "github.com/Shopify/sarama" "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/security" - "github.com/pingcap/tiflow/pkg/util" "go.uber.org/zap" ) @@ -298,13 +298,13 @@ func NewSaramaConfig(ctx context.Context, c *Config) (*sarama.Config, error) { return nil, cerror.WrapError(cerror.ErrKafkaInvalidVersion, err) } var role string - if util.IsOwnerFromCtx(ctx) { + if contextutil.IsOwnerFromCtx(ctx) { role = "owner" } else { role = "processor" } - captureAddr := util.CaptureAddrFromCtx(ctx) - changefeedID := util.ChangefeedIDFromCtx(ctx) + captureAddr := contextutil.CaptureAddrFromCtx(ctx) + changefeedID := contextutil.ChangefeedIDFromCtx(ctx) config.ClientID, err = kafkaClientID(role, captureAddr, changefeedID, c.ClientID) if err != nil { diff --git a/cdc/sink/mq/producer/kafka/config_test.go b/cdc/sink/mq/producer/kafka/config_test.go index fc9ddfcdfec..1046216344c 100644 --- a/cdc/sink/mq/producer/kafka/config_test.go +++ b/cdc/sink/mq/producer/kafka/config_test.go @@ -24,12 +24,12 @@ import ( "github.com/Shopify/sarama" "github.com/pingcap/errors" "github.com/pingcap/tidb/util/timeutil" + "github.com/pingcap/tiflow/cdc/ctx" "github.com/pingcap/tiflow/cdc/sink/codec" "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/kafka" "github.com/pingcap/tiflow/pkg/security" - "github.com/pingcap/tiflow/pkg/util" "github.com/stretchr/testify/require" ) @@ -39,7 +39,7 @@ func TestNewSaramaConfig(t *testing.T) { config.Version = "invalid" _, err := NewSaramaConfig(ctx, config) require.Regexp(t, "invalid version.*", errors.Cause(err)) - ctx = util.SetOwnerInCtx(ctx) + ctx = ctx.SetOwnerInCtx(ctx) config.Version = "2.6.0" config.ClientID = "^invalid$" _, err = NewSaramaConfig(ctx, config) diff --git a/cdc/sink/mq/producer/kafka/kafka.go b/cdc/sink/mq/producer/kafka/kafka.go index bbb9fe952fd..04678534376 100644 --- a/cdc/sink/mq/producer/kafka/kafka.go +++ b/cdc/sink/mq/producer/kafka/kafka.go @@ -26,6 +26,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/sink/codec" cerror "github.com/pingcap/tiflow/pkg/errors" @@ -97,7 +98,7 @@ func (k *kafkaSaramaProducer) AsyncSendMessage( failpoint.Inject("KafkaSinkAsyncSendError", func() { // simulate sending message to input channel successfully but flushing // message to Kafka meets error - log.Info("failpoint error injected", zap.String("changefeed", k.id), zap.Any("role", k.role)) + log.Info("failpoint error injected", zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) k.failpointCh <- errors.New("kafka sink injected error") failpoint.Return(nil) }) @@ -105,7 +106,7 @@ func (k *kafkaSaramaProducer) AsyncSendMessage( failpoint.Inject("SinkFlushDMLPanic", func() { time.Sleep(time.Second) log.Panic("SinkFlushDMLPanic", - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) }) msg := &sarama.ProducerMessage{ @@ -191,13 +192,13 @@ func (k *kafkaSaramaProducer) stop() { if atomic.SwapInt32(&k.closing, kafkaProducerClosing) == kafkaProducerClosing { return } - log.Info("kafka producer closing...", zap.String("changefeed", k.id), zap.Any("role", k.role)) + log.Info("kafka producer closing...", zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) close(k.closeCh) } // Close closes the sync and async clients. func (k *kafkaSaramaProducer) Close() error { - log.Info("stop the kafka producer", zap.String("changefeed", k.id), zap.Any("role", k.role)) + log.Info("stop the kafka producer", zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) k.stop() k.clientLock.Lock() @@ -207,7 +208,7 @@ func (k *kafkaSaramaProducer) Close() error { // We need to guard against double closing the clients, // which could lead to panic. log.Warn("kafka producer already released", - zap.String("changefeed", k.id), + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) return nil } @@ -223,10 +224,10 @@ func (k *kafkaSaramaProducer) Close() error { if err := k.client.Close(); err != nil { log.Error("close sarama client with error", zap.Error(err), zap.Duration("duration", time.Since(start)), - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) } else { log.Info("sarama client closed", zap.Duration("duration", time.Since(start)), - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) } start = time.Now() @@ -234,20 +235,20 @@ func (k *kafkaSaramaProducer) Close() error { if err != nil { log.Error("close async client with error", zap.Error(err), zap.Duration("duration", time.Since(start)), - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) } else { log.Info("async client closed", zap.Duration("duration", time.Since(start)), - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) } start = time.Now() err = k.syncProducer.Close() if err != nil { log.Error("close sync client with error", zap.Error(err), zap.Duration("duration", time.Since(start)), - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) } else { log.Info("sync client closed", zap.Duration("duration", time.Since(start)), - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) } // adminClient should be closed last, since `metricsMonitor` would use it when `Cleanup`. @@ -255,10 +256,10 @@ func (k *kafkaSaramaProducer) Close() error { if err := k.admin.Close(); err != nil { log.Warn("close kafka cluster admin with error", zap.Error(err), zap.Duration("duration", time.Since(start)), - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) } else { log.Info("kafka cluster admin closed", zap.Duration("duration", time.Since(start)), - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) } return nil @@ -267,7 +268,7 @@ func (k *kafkaSaramaProducer) Close() error { func (k *kafkaSaramaProducer) run(ctx context.Context) error { defer func() { log.Info("stop the kafka producer", - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) k.stop() }() @@ -280,7 +281,7 @@ func (k *kafkaSaramaProducer) run(ctx context.Context) error { return nil case err := <-k.failpointCh: log.Warn("receive from failpoint chan", zap.Error(err), - zap.String("changefeed", k.id), zap.Any("role", k.role)) + zap.String("changefeed", k.id.ID), zap.Any("role", k.role)) return err case ack = <-k.asyncProducer.Successes(): case err := <-k.asyncProducer.Errors(): @@ -316,10 +317,10 @@ func NewKafkaSaramaProducer( saramaConfig *sarama.Config, errCh chan error, ) (*kafkaSaramaProducer, error) { - changefeedID := util.ChangefeedIDFromCtx(ctx) - role := util.RoleFromCtx(ctx) + changefeedID := contextutil.ChangefeedIDFromCtx(ctx) + role := contextutil.RoleFromCtx(ctx) log.Info("Starting kafka sarama producer ...", zap.Any("config", config), - zap.String("changefeed", changefeedID), zap.Any("role", role)) + zap.String("changefeed", changefeedID.ID), zap.Any("role", role)) asyncProducer, err := sarama.NewAsyncProducerFromClient(client) if err != nil { @@ -353,7 +354,7 @@ func NewKafkaSaramaProducer( case errCh <- err: default: log.Error("error channel is full", zap.Error(err), - zap.String("changefeed", k.id), zap.Any("role", role)) + zap.String("changefeed", k.id.ID), zap.Any("role", role)) } } }() @@ -365,11 +366,11 @@ var ( commonInvalidChar = regexp.MustCompile(`[\?:,"]`) ) -func kafkaClientID(role, captureAddr, changefeedID, configuredClientID string) (clientID string, err error) { +func kafkaClientID(role, captureAddr string, changefeedID model.ChangeFeedID, configuredClientID string) (clientID string, err error) { if configuredClientID != "" { clientID = configuredClientID } else { - clientID = fmt.Sprintf("TiCDC_sarama_producer_%s_%s_%s", role, captureAddr, changefeedID) + clientID = fmt.Sprintf("TiCDC_sarama_producer_%s_%s_%s", role, captureAddr, changefeedID.String()) clientID = commonInvalidChar.ReplaceAllString(clientID, "_") } if !validClientID.MatchString(clientID) { diff --git a/cdc/sink/mq/producer/kafka/kafka_test.go b/cdc/sink/mq/producer/kafka/kafka_test.go index 9629ccd915f..99713765aa9 100644 --- a/cdc/sink/mq/producer/kafka/kafka_test.go +++ b/cdc/sink/mq/producer/kafka/kafka_test.go @@ -23,6 +23,7 @@ import ( "github.com/Shopify/sarama" "github.com/pingcap/check" "github.com/pingcap/errors" + "github.com/pingcap/tiflow/cdc/ctx" "github.com/pingcap/tiflow/cdc/sink/codec" "github.com/pingcap/tiflow/pkg/kafka" "github.com/pingcap/tiflow/pkg/util" @@ -96,7 +97,7 @@ func TestNewSaramaProducer(t *testing.T) { NewAdminClientImpl = kafka.NewSaramaAdminClient }() - ctx = util.PutRoleInCtx(ctx, util.RoleTester) + ctx = ctx.PutRoleInCtx(ctx, util.RoleTester) saramaConfig, err := NewSaramaConfig(ctx, config) require.Nil(t, err) saramaConfig.Producer.Flush.MaxMessages = 1 @@ -359,7 +360,7 @@ func TestProducerSendMessageFailed(t *testing.T) { }() errCh := make(chan error, 1) - ctx = util.PutRoleInCtx(ctx, util.RoleTester) + ctx = ctx.PutRoleInCtx(ctx, util.RoleTester) saramaConfig, err := NewSaramaConfig(context.Background(), config) require.Nil(t, err) saramaConfig.Producer.Flush.MaxMessages = 1 @@ -443,7 +444,7 @@ func TestProducerDoubleClose(t *testing.T) { }() errCh := make(chan error, 1) - ctx = util.PutRoleInCtx(ctx, util.RoleTester) + ctx = ctx.PutRoleInCtx(ctx, util.RoleTester) saramaConfig, err := NewSaramaConfig(context.Background(), config) require.Nil(t, err) client, err := sarama.NewClient(config.BrokerEndpoints, saramaConfig) diff --git a/cdc/sink/mq/producer/kafka/metrics.go b/cdc/sink/mq/producer/kafka/metrics.go index d79d59e7996..e1a6142f193 100644 --- a/cdc/sink/mq/producer/kafka/metrics.go +++ b/cdc/sink/mq/producer/kafka/metrics.go @@ -19,6 +19,7 @@ import ( "time" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/kafka" "github.com/pingcap/tiflow/pkg/util" "github.com/prometheus/client_golang/prometheus" @@ -176,7 +177,7 @@ const ( ) type saramaMetricsMonitor struct { - changefeedID string + changefeedID model.ChangeFeedID role util.Role registry metrics.Registry @@ -194,22 +195,22 @@ func (sm *saramaMetricsMonitor) collectMetrics() { func (sm *saramaMetricsMonitor) collectProducerMetrics() { batchSizeMetric := sm.registry.Get(batchSizeMetricName) if histogram, ok := batchSizeMetric.(metrics.Histogram); ok { - batchSizeGauge.WithLabelValues(sm.changefeedID).Set(histogram.Snapshot().Mean()) + batchSizeGauge.WithLabelValues(sm.changefeedID.ID).Set(histogram.Snapshot().Mean()) } recordSendRateMetric := sm.registry.Get(recordSendRateMetricName) if meter, ok := recordSendRateMetric.(metrics.Meter); ok { - recordSendRateGauge.WithLabelValues(sm.changefeedID).Set(meter.Snapshot().Rate1()) + recordSendRateGauge.WithLabelValues(sm.changefeedID.ID).Set(meter.Snapshot().Rate1()) } recordPerRequestMetric := sm.registry.Get(recordPerRequestMetricName) if histogram, ok := recordPerRequestMetric.(metrics.Histogram); ok { - recordPerRequestGauge.WithLabelValues(sm.changefeedID).Set(histogram.Snapshot().Mean()) + recordPerRequestGauge.WithLabelValues(sm.changefeedID.ID).Set(histogram.Snapshot().Mean()) } compressionRatioMetric := sm.registry.Get(compressionRatioMetricName) if histogram, ok := compressionRatioMetric.(metrics.Histogram); ok { - compressionRatioGauge.WithLabelValues(sm.changefeedID).Set(histogram.Snapshot().Mean()) + compressionRatioGauge.WithLabelValues(sm.changefeedID.ID).Set(histogram.Snapshot().Mean()) } } @@ -223,7 +224,7 @@ func (sm *saramaMetricsMonitor) collectBrokers() { if err != nil { log.Warn("kafka cluster unreachable, "+ "use historical brokers to collect kafka broker level metrics", - zap.String("changefeed", sm.changefeedID), + zap.String("changefeed", sm.changefeedID.ID), zap.Any("role", sm.role), zap.Duration("duration", time.Since(start)), zap.Error(err)) @@ -243,42 +244,42 @@ func (sm *saramaMetricsMonitor) collectBrokerMetrics() { incomingByteRateMetric := sm.registry.Get(getBrokerMetricName(incomingByteRateMetricNamePrefix, brokerID)) if meter, ok := incomingByteRateMetric.(metrics.Meter); ok { - incomingByteRateGauge.WithLabelValues(sm.changefeedID, brokerID).Set(meter.Snapshot().Rate1()) + incomingByteRateGauge.WithLabelValues(sm.changefeedID.ID, brokerID).Set(meter.Snapshot().Rate1()) } outgoingByteRateMetric := sm.registry.Get(getBrokerMetricName(outgoingByteRateMetricNamePrefix, brokerID)) if meter, ok := outgoingByteRateMetric.(metrics.Meter); ok { - outgoingByteRateGauge.WithLabelValues(sm.changefeedID, brokerID).Set(meter.Snapshot().Rate1()) + outgoingByteRateGauge.WithLabelValues(sm.changefeedID.ID, brokerID).Set(meter.Snapshot().Rate1()) } requestRateMetric := sm.registry.Get(getBrokerMetricName(requestRateMetricNamePrefix, brokerID)) if meter, ok := requestRateMetric.(metrics.Meter); ok { - requestRateGauge.WithLabelValues(sm.changefeedID, brokerID).Set(meter.Snapshot().Rate1()) + requestRateGauge.WithLabelValues(sm.changefeedID.ID, brokerID).Set(meter.Snapshot().Rate1()) } requestSizeMetric := sm.registry.Get(getBrokerMetricName(requestSizeMetricNamePrefix, brokerID)) if histogram, ok := requestSizeMetric.(metrics.Histogram); ok { - requestSizeGauge.WithLabelValues(sm.changefeedID, brokerID).Set(histogram.Snapshot().Mean()) + requestSizeGauge.WithLabelValues(sm.changefeedID.ID, brokerID).Set(histogram.Snapshot().Mean()) } requestLatencyMetric := sm.registry.Get(getBrokerMetricName(requestLatencyInMsMetricNamePrefix, brokerID)) if histogram, ok := requestLatencyMetric.(metrics.Histogram); ok { - requestLatencyInMsGauge.WithLabelValues(sm.changefeedID, brokerID).Set(histogram.Snapshot().Mean()) + requestLatencyInMsGauge.WithLabelValues(sm.changefeedID.ID, brokerID).Set(histogram.Snapshot().Mean()) } requestsInFlightMetric := sm.registry.Get(getBrokerMetricName(requestsInFlightMetricNamePrefix, brokerID)) if counter, ok := requestsInFlightMetric.(metrics.Counter); ok { - requestsInFlightGauge.WithLabelValues(sm.changefeedID, brokerID).Set(float64(counter.Snapshot().Count())) + requestsInFlightGauge.WithLabelValues(sm.changefeedID.ID, brokerID).Set(float64(counter.Snapshot().Count())) } responseRateMetric := sm.registry.Get(getBrokerMetricName(responseRateMetricNamePrefix, brokerID)) if meter, ok := responseRateMetric.(metrics.Meter); ok { - responseRateGauge.WithLabelValues(sm.changefeedID, brokerID).Set(meter.Snapshot().Rate1()) + responseRateGauge.WithLabelValues(sm.changefeedID.ID, brokerID).Set(meter.Snapshot().Rate1()) } responseSizeMetric := sm.registry.Get(getBrokerMetricName(responseSizeMetricNamePrefix, brokerID)) if histogram, ok := responseSizeMetric.(metrics.Histogram); ok { - responseSizeGauge.WithLabelValues(sm.changefeedID, brokerID).Set(histogram.Snapshot().Mean()) + responseSizeGauge.WithLabelValues(sm.changefeedID.ID, brokerID).Set(histogram.Snapshot().Mean()) } } } @@ -286,7 +287,7 @@ func (sm *saramaMetricsMonitor) collectBrokerMetrics() { // flushMetricsInterval specifies the interval of refresh sarama metrics. const flushMetricsInterval = 5 * time.Second -func runSaramaMetricsMonitor(ctx context.Context, registry metrics.Registry, changefeedID string, +func runSaramaMetricsMonitor(ctx context.Context, registry metrics.Registry, changefeedID model.ChangeFeedID, role util.Role, admin kafka.ClusterAdminClient, ) { monitor := &saramaMetricsMonitor{ @@ -321,22 +322,22 @@ func (sm *saramaMetricsMonitor) cleanup() { } func (sm *saramaMetricsMonitor) cleanUpProducerMetrics() { - batchSizeGauge.DeleteLabelValues(sm.changefeedID) - recordSendRateGauge.DeleteLabelValues(sm.changefeedID) - recordPerRequestGauge.DeleteLabelValues(sm.changefeedID) - compressionRatioGauge.DeleteLabelValues(sm.changefeedID) + batchSizeGauge.DeleteLabelValues(sm.changefeedID.ID) + recordSendRateGauge.DeleteLabelValues(sm.changefeedID.ID) + recordPerRequestGauge.DeleteLabelValues(sm.changefeedID.ID) + compressionRatioGauge.DeleteLabelValues(sm.changefeedID.ID) } func (sm *saramaMetricsMonitor) cleanUpBrokerMetrics() { for id := range sm.brokers { brokerID := strconv.Itoa(int(id)) - incomingByteRateGauge.DeleteLabelValues(sm.changefeedID, brokerID) - outgoingByteRateGauge.DeleteLabelValues(sm.changefeedID, brokerID) - requestRateGauge.DeleteLabelValues(sm.changefeedID, brokerID) - requestSizeGauge.DeleteLabelValues(sm.changefeedID, brokerID) - requestLatencyInMsGauge.DeleteLabelValues(sm.changefeedID, brokerID) - requestsInFlightGauge.DeleteLabelValues(sm.changefeedID, brokerID) - responseRateGauge.DeleteLabelValues(sm.changefeedID, brokerID) - responseSizeGauge.DeleteLabelValues(sm.changefeedID, brokerID) + incomingByteRateGauge.DeleteLabelValues(sm.changefeedID.ID, brokerID) + outgoingByteRateGauge.DeleteLabelValues(sm.changefeedID.ID, brokerID) + requestRateGauge.DeleteLabelValues(sm.changefeedID.ID, brokerID) + requestSizeGauge.DeleteLabelValues(sm.changefeedID.ID, brokerID) + requestLatencyInMsGauge.DeleteLabelValues(sm.changefeedID.ID, brokerID) + requestsInFlightGauge.DeleteLabelValues(sm.changefeedID.ID, brokerID) + responseRateGauge.DeleteLabelValues(sm.changefeedID.ID, brokerID) + responseSizeGauge.DeleteLabelValues(sm.changefeedID.ID, brokerID) } } diff --git a/cdc/sink/mysql/mysql.go b/cdc/sink/mysql/mysql.go index 55b7fec048f..e1243ec254c 100644 --- a/cdc/sink/mysql/mysql.go +++ b/cdc/sink/mysql/mysql.go @@ -88,7 +88,7 @@ func NewMySQLSink( replicaConfig *config.ReplicaConfig, opts map[string]string, ) (*mysqlSink, error) { - opts[metrics.OptChangefeedID] = changefeedID + opts[metrics.OptChangefeedID] = changefeedID.ID params, err := parseSinkURIToParams(ctx, sinkURI, opts) if err != nil { return nil, err diff --git a/cdc/sink/mysql/mysql_params.go b/cdc/sink/mysql/mysql_params.go index 0d3e365d997..8e503ab61ae 100644 --- a/cdc/sink/mysql/mysql_params.go +++ b/cdc/sink/mysql/mysql_params.go @@ -25,10 +25,10 @@ import ( dmysql "github.com/go-sql-driver/mysql" "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/sink/metrics" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/security" - "github.com/pingcap/tiflow/pkg/util" "go.uber.org/zap" ) @@ -219,7 +219,7 @@ func parseSinkURIToParams(ctx context.Context, sinkURI *url.URL, opts map[string params.timezone = fmt.Sprintf(`"%s"`, s) } } else { - tz := util.TimezoneFromCtx(ctx) + tz := contextutil.TimezoneFromCtx(ctx) params.timezone = fmt.Sprintf(`"%s"`, tz.String()) } diff --git a/cdc/sink/mysql/mysql_syncpoint_store.go b/cdc/sink/mysql/mysql_syncpoint_store.go index e5d7eaada2d..b581c5d21b3 100644 --- a/cdc/sink/mysql/mysql_syncpoint_store.go +++ b/cdc/sink/mysql/mysql_syncpoint_store.go @@ -23,10 +23,11 @@ import ( dmysql "github.com/go-sql-driver/mysql" "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" + "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/cyclic/mark" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/security" - "github.com/pingcap/tiflow/pkg/util" "go.uber.org/zap" ) @@ -38,7 +39,7 @@ type mysqlSyncpointStore struct { } // newSyncpointStore create a sink to record the syncpoint map in downstream DB for every changefeed -func newMySQLSyncpointStore(ctx context.Context, id string, sinkURI *url.URL) (SyncpointStore, error) { +func newMySQLSyncpointStore(ctx context.Context, id model.ChangeFeedID, sinkURI *url.URL) (SyncpointStore, error) { var syncDB *sql.DB // todo If is neither mysql nor tidb, such as kafka, just ignore this feature. @@ -66,7 +67,7 @@ func newMySQLSyncpointStore(ctx context.Context, id string, sinkURI *url.URL) (S if err != nil { return nil, cerror.ErrMySQLConnectionError.Wrap(err).GenWithStack("fail to open MySQL connection") } - name := "cdc_mysql_tls" + "syncpoint" + id + name := "cdc_mysql_tls" + "syncpoint" + id.Namespace + "_" + id.ID err = dmysql.RegisterTLSConfig(name, tlsCfg) if err != nil { return nil, cerror.ErrMySQLConnectionError.Wrap(err).GenWithStack("fail to open MySQL connection") @@ -81,7 +82,7 @@ func newMySQLSyncpointStore(ctx context.Context, id string, sinkURI *url.URL) (S params.timezone = fmt.Sprintf(`"%s"`, s) } } else { - tz := util.TimezoneFromCtx(ctx) + tz := contextutil.TimezoneFromCtx(ctx) params.timezone = fmt.Sprintf(`"%s"`, tz.String()) } @@ -168,7 +169,7 @@ func (s *mysqlSyncpointStore) CreateSynctable(ctx context.Context) error { return cerror.WrapError(cerror.ErrMySQLTxnError, err) } -func (s *mysqlSyncpointStore) SinkSyncpoint(ctx context.Context, id string, checkpointTs uint64) error { +func (s *mysqlSyncpointStore) SinkSyncpoint(ctx context.Context, id model.ChangeFeedID, checkpointTs uint64) error { tx, err := s.db.BeginTx(ctx, nil) if err != nil { log.Error("sync table: begin Tx fail", zap.Error(err)) @@ -187,7 +188,7 @@ func (s *mysqlSyncpointStore) SinkSyncpoint(ctx context.Context, id string, chec } query := "insert ignore into " + mark.SchemaName + "." + syncpointTableName + "(cf, primary_ts, secondary_ts) VALUES (?,?,?)" - _, err = tx.Exec(query, id, checkpointTs, secondaryTs) + _, err = tx.Exec(query, id.String(), checkpointTs, secondaryTs) if err != nil { err2 := tx.Rollback() if err2 != nil { diff --git a/cdc/sink/mysql/syncpointStore.go b/cdc/sink/mysql/syncpointStore.go index 1e9ed1d8d43..5fc2d88e460 100644 --- a/cdc/sink/mysql/syncpointStore.go +++ b/cdc/sink/mysql/syncpointStore.go @@ -28,7 +28,7 @@ type SyncpointStore interface { CreateSynctable(ctx context.Context) error // SinkSyncpoint record the syncpoint(a map with ts) in downstream db - SinkSyncpoint(ctx context.Context, id string, checkpointTs uint64) error + SinkSyncpoint(ctx context.Context, id model.ChangeFeedID, checkpointTs uint64) error // Close closes the SyncpointSink Close() error diff --git a/cdc/sink/sink.go b/cdc/sink/sink.go index 5613245a814..93b02de29b9 100644 --- a/cdc/sink/sink.go +++ b/cdc/sink/sink.go @@ -19,6 +19,7 @@ import ( "strings" "github.com/pingcap/failpoint" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/sink/mq" "github.com/pingcap/tiflow/cdc/sink/mysql" @@ -164,9 +165,9 @@ func Validate(ctx context.Context, sinkURI string, cfg *config.ReplicaConfig, op return err } errCh := make(chan error) - ctx = util.PutRoleInCtx(ctx, util.RoleClient) + ctx = contextutil.PutRoleInCtx(ctx, util.RoleClient) // TODO: find a better way to verify a sinkURI is valid - s, err := New(ctx, "sink-verify", sinkURI, sinkFilter, cfg, opts, errCh) + s, err := New(ctx, model.ChangeFeedID{"default", "sink-verify"}, sinkURI, sinkFilter, cfg, opts, errCh) if err != nil { return err } diff --git a/cdc/sink/sink_manager.go b/cdc/sink/sink_manager.go index c56e6e70b2e..aa06e51e4c4 100644 --- a/cdc/sink/sink_manager.go +++ b/cdc/sink/sink_manager.go @@ -44,8 +44,8 @@ func NewManager( captureAddr string, changefeedID model.ChangeFeedID, ) *Manager { bufSink := newBufferSink(backendSink, checkpointTs) - go bufSink.run(ctx, changefeedID, errCh) - counter := metrics.TableSinkTotalRowsCountCounter.WithLabelValues(changefeedID) + go bufSink.run(ctx, changefeedID.ID, errCh) + counter := metrics.TableSinkTotalRowsCountCounter.WithLabelValues(changefeedID.ID) return &Manager{ bufSink: bufSink, tableSinks: make(map[model.TableID]*tableSink), @@ -82,11 +82,11 @@ func (m *Manager) CreateTableSink( func (m *Manager) Close(ctx context.Context) error { m.tableSinksMu.Lock() defer m.tableSinksMu.Unlock() - metrics.TableSinkTotalRowsCountCounter.DeleteLabelValues(m.changefeedID) + metrics.TableSinkTotalRowsCountCounter.DeleteLabelValues(m.changefeedID.ID) if m.bufSink != nil { if err := m.bufSink.Close(ctx); err != nil && errors.Cause(err) != context.Canceled { log.Warn("close bufSink failed", - zap.String("changefeed", m.changefeedID), + zap.String("changefeed", m.changefeedID.ID), zap.Error(err)) return err } diff --git a/cdc/sorter/leveldb/sorter.go b/cdc/sorter/leveldb/sorter.go index cbcaebe5c60..b40bd44f1da 100644 --- a/cdc/sorter/leveldb/sorter.go +++ b/cdc/sorter/leveldb/sorter.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/sorter" "github.com/pingcap/tiflow/cdc/sorter/encoding" @@ -29,7 +30,6 @@ import ( actormsg "github.com/pingcap/tiflow/pkg/actor/message" "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" - "github.com/pingcap/tiflow/pkg/util" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" ) @@ -94,11 +94,11 @@ func NewSorter( readerSystem *actor.System[message.Task], readerRouter *actor.Router[message.Task], compact *CompactScheduler, cfg *config.DBConfig, ) (*Sorter, error) { - changefeedID := util.ChangefeedIDFromCtx(ctx) + changefeedID := contextutil.ChangefeedIDFromCtx(ctx) metricIterDuration := sorterIterReadDurationHistogram.MustCurryWith( - prometheus.Labels{"id": changefeedID}) - metricTotalEventsKV := sorter.EventCount.WithLabelValues(changefeedID, "kv") - metricTotalEventsResolvedTs := sorter.EventCount.WithLabelValues(changefeedID, "resolved") + prometheus.Labels{"id": changefeedID.ID}) + metricTotalEventsKV := sorter.EventCount.WithLabelValues(changefeedID.ID, "kv") + metricTotalEventsResolvedTs := sorter.EventCount.WithLabelValues(changefeedID.ID, "resolved") // TODO: test capture the same table multiple times. uid := allocID() diff --git a/cdc/sorter/memory/entry_sorter.go b/cdc/sorter/memory/entry_sorter.go index 2561a9d8e23..469e57b649d 100644 --- a/cdc/sorter/memory/entry_sorter.go +++ b/cdc/sorter/memory/entry_sorter.go @@ -22,10 +22,10 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/notify" - "github.com/pingcap/tiflow/pkg/util" "go.uber.org/zap" "golang.org/x/sync/errgroup" ) @@ -51,13 +51,13 @@ func NewEntrySorter() *EntrySorter { // Run runs EntrySorter func (es *EntrySorter) Run(ctx context.Context) error { - changefeedID := util.ChangefeedIDFromCtx(ctx) - _, tableName := util.TableIDFromCtx(ctx) - metricEntrySorterResolvedChanSizeGuage := entrySorterResolvedChanSizeGauge.WithLabelValues(changefeedID, tableName) - metricEntrySorterOutputChanSizeGauge := entrySorterOutputChanSizeGauge.WithLabelValues(changefeedID, tableName) - metricEntryUnsortedSizeGauge := entrySorterUnsortedSizeGauge.WithLabelValues(changefeedID, tableName) - metricEntrySorterSortDuration := entrySorterSortDuration.WithLabelValues(changefeedID, tableName) - metricEntrySorterMergeDuration := entrySorterMergeDuration.WithLabelValues(changefeedID, tableName) + changefeedID := contextutil.ChangefeedIDFromCtx(ctx) + _, tableName := contextutil.TableIDFromCtx(ctx) + metricEntrySorterResolvedChanSizeGuage := entrySorterResolvedChanSizeGauge.WithLabelValues(changefeedID.ID, tableName) + metricEntrySorterOutputChanSizeGauge := entrySorterOutputChanSizeGauge.WithLabelValues(changefeedID.ID, tableName) + metricEntryUnsortedSizeGauge := entrySorterUnsortedSizeGauge.WithLabelValues(changefeedID.ID, tableName) + metricEntrySorterSortDuration := entrySorterSortDuration.WithLabelValues(changefeedID.ID, tableName) + metricEntrySorterMergeDuration := entrySorterMergeDuration.WithLabelValues(changefeedID.ID, tableName) output := func(ctx context.Context, entry *model.PolymorphicEvent) { select { diff --git a/cdc/sorter/unified/backend_pool.go b/cdc/sorter/unified/backend_pool.go index d3ac6abcccb..23650e09378 100644 --- a/cdc/sorter/unified/backend_pool.go +++ b/cdc/sorter/unified/backend_pool.go @@ -28,12 +28,12 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/log" "github.com/pingcap/tidb/util/memory" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/sorter" sorterencoding "github.com/pingcap/tiflow/cdc/sorter/encoding" "github.com/pingcap/tiflow/pkg/config" cerrors "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/fsutil" - "github.com/pingcap/tiflow/pkg/util" "go.uber.org/zap" ) @@ -187,7 +187,7 @@ func (p *backEndPool) alloc(ctx context.Context) (backEnd, error) { } fname := fmt.Sprintf("%s%d.tmp", p.filePrefix, atomic.AddUint64(&p.fileNameCounter, 1)) - tableID, tableName := util.TableIDFromCtx(ctx) + tableID, tableName := contextutil.TableIDFromCtx(ctx) log.Debug("Unified Sorter: trying to create file backEnd", zap.String("filename", fname), zap.Int64("tableID", tableID), @@ -392,7 +392,7 @@ func checkDataDirSatisfied() error { if err != nil { return cerrors.WrapError(cerrors.ErrCheckDataDirSatisfied, err) } - if diskInfo.AvailPercentage < dataDirAvailLowThreshold { + if diskInfo.AvailPercentage > dataDirAvailLowThreshold { failpoint.Inject("InjectCheckDataDirSatisfied", func() { log.Info("inject check data dir satisfied error") failpoint.Return(nil) diff --git a/cdc/sorter/unified/heap_sorter.go b/cdc/sorter/unified/heap_sorter.go index 3edb4665b50..0edb673ab3e 100644 --- a/cdc/sorter/unified/heap_sorter.go +++ b/cdc/sorter/unified/heap_sorter.go @@ -23,10 +23,10 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/config" cerrors "github.com/pingcap/tiflow/pkg/errors" - "github.com/pingcap/tiflow/pkg/util" "github.com/pingcap/tiflow/pkg/workerpool" "go.uber.org/zap" ) @@ -95,7 +95,7 @@ func newHeapSorter(id int, out chan *flushTask) *heapSorter { // flush should only be called in the same goroutine where the heap is being written to. func (h *heapSorter) flush(ctx context.Context, maxResolvedTs uint64) error { - changefeedID := util.ChangefeedIDFromCtx(ctx) + changefeedID := contextutil.ChangefeedIDFromCtx(ctx) var ( backEnd backEnd @@ -108,7 +108,7 @@ func (h *heapSorter) flush(ctx context.Context, maxResolvedTs uint64) error { return nil } - sorterFlushCountHistogram.WithLabelValues(changefeedID).Observe(float64(h.heap.Len())) + sorterFlushCountHistogram.WithLabelValues(changefeedID.ID).Observe(float64(h.heap.Len())) // We check if the heap contains only one entry and that entry is a ResolvedEvent. // As an optimization, when the condition is true, we clear the heap and send an empty flush. @@ -167,7 +167,7 @@ func (h *heapSorter) flush(ctx context.Context, maxResolvedTs uint64) error { } } failpoint.Inject("sorterDebug", func() { - tableID, tableName := util.TableIDFromCtx(ctx) + tableID, tableName := contextutil.TableIDFromCtx(ctx) log.Debug("Unified Sorter new flushTask", zap.Int64("tableID", tableID), zap.String("tableName", tableName), @@ -253,7 +253,7 @@ func (h *heapSorter) flush(ctx context.Context, maxResolvedTs uint64) error { backEndFinal = nil failpoint.Inject("sorterDebug", func() { - tableID, tableName := util.TableIDFromCtx(ctx) + tableID, tableName := contextutil.TableIDFromCtx(ctx) log.Debug("Unified Sorter flushTask finished", zap.Int("heapID", task.heapSorterID), zap.Int64("tableID", tableID), diff --git a/cdc/sorter/unified/merger.go b/cdc/sorter/unified/merger.go index 672f173bf24..b8657ea8dfa 100644 --- a/cdc/sorter/unified/merger.go +++ b/cdc/sorter/unified/merger.go @@ -25,10 +25,10 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/sorter" cerrors "github.com/pingcap/tiflow/pkg/errors" - "github.com/pingcap/tiflow/pkg/util" "github.com/tikv/client-go/v2/oracle" "go.uber.org/zap" "golang.org/x/sync/errgroup" @@ -36,14 +36,14 @@ import ( // TODO refactor this into a struct Merger. func runMerger(ctx context.Context, numSorters int, in <-chan *flushTask, out chan *model.PolymorphicEvent, onExit func()) error { - changefeedID := util.ChangefeedIDFromCtx(ctx) + changefeedID := contextutil.ChangefeedIDFromCtx(ctx) metricSorterEventCount := sorter.EventCount.MustCurryWith(map[string]string{ - "changefeed": changefeedID, + "changefeed": changefeedID.ID, }) - metricSorterResolvedTsGauge := sorter.ResolvedTsGauge.WithLabelValues(changefeedID) - metricSorterMergerStartTsGauge := sorterMergerStartTsGauge.WithLabelValues(changefeedID) - metricSorterMergeCountHistogram := sorterMergeCountHistogram.WithLabelValues(changefeedID) + metricSorterResolvedTsGauge := sorter.ResolvedTsGauge.WithLabelValues(changefeedID.ID) + metricSorterMergerStartTsGauge := sorterMergerStartTsGauge.WithLabelValues(changefeedID.ID) + metricSorterMergeCountHistogram := sorterMergeCountHistogram.WithLabelValues(changefeedID.ID) lastResolvedTs := make([]uint64, numSorters) minResolvedTs := uint64(0) @@ -254,7 +254,7 @@ func runMerger(ctx context.Context, numSorters int, in <-chan *flushTask, out ch failpoint.Inject("sorterDebug", func() { if sortHeap.Len() > 0 { - tableID, tableName := util.TableIDFromCtx(ctx) + tableID, tableName := contextutil.TableIDFromCtx(ctx) log.Debug("Unified Sorter: start merging", zap.Int64("tableID", tableID), zap.String("tableName", tableName), @@ -362,7 +362,7 @@ func runMerger(ctx context.Context, numSorters int, in <-chan *flushTask, out ch failpoint.Inject("sorterDebug", func() { if counter%10 == 0 { - tableID, tableName := util.TableIDFromCtx(ctx) + tableID, tableName := contextutil.TableIDFromCtx(ctx) log.Debug("Merging progress", zap.Int64("tableID", tableID), zap.String("tableName", tableName), @@ -382,7 +382,7 @@ func runMerger(ctx context.Context, numSorters int, in <-chan *flushTask, out ch failpoint.Inject("sorterDebug", func() { if counter > 0 { - tableID, tableName := util.TableIDFromCtx(ctx) + tableID, tableName := contextutil.TableIDFromCtx(ctx) log.Debug("Unified Sorter: merging ended", zap.Int64("tableID", tableID), zap.String("tableName", tableName), @@ -415,7 +415,7 @@ func runMerger(ctx context.Context, numSorters int, in <-chan *flushTask, out ch } if task == nil { - tableID, tableName := util.TableIDFromCtx(ctx) + tableID, tableName := contextutil.TableIDFromCtx(ctx) log.Debug("Merger input channel closed, exiting", zap.Int64("tableID", tableID), zap.String("tableName", tableName)) diff --git a/cdc/sorter/unified/unified_sorter.go b/cdc/sorter/unified/unified_sorter.go index e998f457c1a..a52b33d6eb8 100644 --- a/cdc/sorter/unified/unified_sorter.go +++ b/cdc/sorter/unified/unified_sorter.go @@ -20,6 +20,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" @@ -113,8 +114,8 @@ func (s *Sorter) Run(ctx context.Context) error { defer finish() ctx = context.WithValue(ctx, ctxKey{}, s) - ctx = util.PutChangefeedIDInCtx(ctx, s.metricsInfo.changeFeedID) - ctx = util.PutTableInfoInCtx(ctx, s.metricsInfo.tableID, s.metricsInfo.tableName) + ctx = contextutil.PutChangefeedIDInCtx(ctx, s.metricsInfo.changeFeedID) + ctx = contextutil.PutTableInfoInCtx(ctx, s.metricsInfo.tableID, s.metricsInfo.tableName) sorterConfig := config.GetGlobalServerConfig().Sorter numConcurrentHeaps := sorterConfig.NumConcurrentWorker @@ -169,10 +170,10 @@ func (s *Sorter) Run(ctx context.Context) error { }) errg.Go(func() error { - changefeedID := util.ChangefeedIDFromCtx(ctx) + changefeedID := contextutil.ChangefeedIDFromCtx(ctx) metricSorterConsumeCount := sorterConsumeCount.MustCurryWith(map[string]string{ - "changefeed": changefeedID, + "changefeed": changefeedID.ID, }) nextSorterID := 0 diff --git a/cmd/kafka-consumer/main.go b/cmd/kafka-consumer/main.go index d251368e506..23292da29eb 100644 --- a/cmd/kafka-consumer/main.go +++ b/cmd/kafka-consumer/main.go @@ -33,6 +33,7 @@ import ( "github.com/google/uuid" "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/ctx" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/sink" "github.com/pingcap/tiflow/cdc/sink/codec" @@ -380,7 +381,7 @@ func NewConsumer(ctx context.Context) (*Consumer, error) { if err != nil { return nil, errors.Annotate(err, "can not load timezone") } - ctx = util.PutTimezoneInCtx(ctx, tz) + ctx = ctx.PutTimezoneInCtx(ctx, tz) filter, err := cdcfilter.NewFilter(config.GetDefaultReplicaConfig()) if err != nil { return nil, errors.Trace(err) @@ -412,7 +413,7 @@ func NewConsumer(ctx context.Context) (*Consumer, error) { c.sinks = make([]*partitionSink, kafkaPartitionNum) ctx, cancel := context.WithCancel(ctx) - ctx = util.PutRoleInCtx(ctx, util.RoleKafkaConsumer) + ctx = ctx.PutRoleInCtx(ctx, util.RoleKafkaConsumer) errCh := make(chan error, 1) opts := map[string]string{} for i := 0; i < int(kafkaPartitionNum); i++ { diff --git a/pkg/applier/redo.go b/pkg/applier/redo.go index 00fea1599e7..2aa5d234884 100644 --- a/pkg/applier/redo.go +++ b/pkg/applier/redo.go @@ -19,6 +19,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/redo" "github.com/pingcap/tiflow/cdc/redo/reader" @@ -113,8 +114,8 @@ func (ra *RedoApplier) consumeLogs(ctx context.Context) error { return err } opts := map[string]string{} - ctx = util.PutRoleInCtx(ctx, util.RoleRedoLogApplier) - s, err := sink.New(ctx, applierChangefeed, ra.cfg.SinkURI, ft, replicaConfig, opts, ra.errCh) + ctx = contextutil.PutRoleInCtx(ctx, util.RoleRedoLogApplier) + s, err := sink.New(ctx, model.ChangeFeedID{Namespace: "default", ID: applierChangefeed}, ra.cfg.SinkURI, ft, replicaConfig, opts, ra.errCh) if err != nil { return err } diff --git a/pkg/cmd/cli/cli_capture_list.go b/pkg/cmd/cli/cli_capture_list.go index 1f62a4491e1..9b77594a4bf 100644 --- a/pkg/cmd/cli/cli_capture_list.go +++ b/pkg/cmd/cli/cli_capture_list.go @@ -96,7 +96,7 @@ func listCaptures(ctx context.Context, etcdClient *etcd.CDCEtcdClient) ([]*captu return nil, err } - ownerID, err := etcdClient.GetOwnerID(ctx, etcd.CaptureOwnerKey) + ownerID, err := etcdClient.GetOwnerID(ctx, etcd.CaptureOwnerKey()) if err != nil && errors.Cause(err) != concurrency.ErrElectionNoLeader { return nil, err } diff --git a/pkg/cmd/cli/cli_changefeed_create.go b/pkg/cmd/cli/cli_changefeed_create.go index d5b91a1af22..5e8ad9cda74 100644 --- a/pkg/cmd/cli/cli_changefeed_create.go +++ b/pkg/cmd/cli/cli_changefeed_create.go @@ -15,6 +15,7 @@ package cli import ( "context" + "fmt" "net/url" "strings" "time" @@ -23,6 +24,7 @@ import ( "github.com/google/uuid" "github.com/pingcap/errors" "github.com/pingcap/log" + ctx2 "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/cdc/sink" cmdcontext "github.com/pingcap/tiflow/pkg/cmd/context" @@ -57,6 +59,8 @@ type changefeedCommonOptions struct { cyclicSyncDDL bool syncPointEnabled bool syncPointInterval time.Duration + clusterID string + namespace string } // newChangefeedCommonOptions creates new changefeed common options. @@ -83,6 +87,8 @@ func (o *changefeedCommonOptions) addFlags(cmd *cobra.Command) { cmd.PersistentFlags().BoolVar(&o.cyclicSyncDDL, "cyclic-sync-ddl", true, "(Experimental) Cyclic replication sync DDL of changefeed") cmd.PersistentFlags().BoolVar(&o.syncPointEnabled, "sync-point", false, "(Experimental) Set and Record syncpoint in replication(default off)") cmd.PersistentFlags().DurationVar(&o.syncPointInterval, "sync-interval", 10*time.Minute, "(Experimental) Set the interval for syncpoint in replication(default 10min)") + cmd.PersistentFlags().StringVar(&o.namespace, "namespace", "default", "changefeed namespace") + cmd.PersistentFlags().StringVar(&o.clusterID, "cluster-id", "default", "ticdc cluster id ") _ = cmd.PersistentFlags().MarkHidden("sort-dir") } @@ -363,7 +369,7 @@ func (o *createChangefeedOptions) validateStartTs(ctx context.Context) error { // Ensure the start ts is validate in the next 1 hour. const ensureTTL = 60 * 60. return gc.EnsureChangefeedStartTsSafety( - ctx, o.pdClient, o.changefeedID, ensureTTL, o.startTs) + ctx, o.pdClient, model.ChangeFeedID{o.commonChangefeedOptions.namespace, o.changefeedID}, ensureTTL, o.startTs) } // validateTargetTs checks if targetTs is a valid value. @@ -432,18 +438,29 @@ func (o *createChangefeedOptions) run(ctx context.Context, cmd *cobra.Command) e return errors.Annotate(err, "can not load timezone, Please specify the time zone through environment variable `TZ` or command line parameters `--tz`") } - ctx = ticdcutil.PutTimezoneInCtx(ctx, tz) + ctx = ctx2.PutTimezoneInCtx(ctx, tz) err = o.validateSink(ctx, info.Config, info.Opts) if err != nil { return err } + info.UpstreamID = fmt.Sprintf("%d", o.pdClient.GetClusterID(ctx)) infoStr, err := info.Marshal() if err != nil { return err } - err = o.etcdClient.CreateChangefeedInfo(ctx, info, id) + upstreamInfo := &model.UpstreamInfo{ + PD: o.pdAddr, + KeyPath: o.credential.KeyPath, + CAPath: o.credential.CAPath, + CertPath: o.credential.CertPath, + } + err = o.etcdClient.CreateChangefeedInfo(ctx, o.commonChangefeedOptions.clusterID, upstreamInfo, info, + model.ChangeFeedID{ + Namespace: o.commonChangefeedOptions.namespace, + ID: o.changefeedID, + }) if err != nil { return err } @@ -471,6 +488,7 @@ func newCmdCreateChangefeed(f factory.Factory) *cobra.Command { return err } + config.GetGlobalServerConfig().ClusterID = o.commonChangefeedOptions.clusterID err = o.validate(ctx, cmd) if err != nil { return err diff --git a/pkg/cmd/cli/cli_changefeed_helper.go b/pkg/cmd/cli/cli_changefeed_helper.go index c4278a8c4bf..af0da818f2f 100644 --- a/pkg/cmd/cli/cli_changefeed_helper.go +++ b/pkg/cmd/cli/cli_changefeed_helper.go @@ -108,7 +108,7 @@ func sendOwnerChangefeedQuery(ctx context.Context, etcdClient *etcd.CDCEtcdClien } resp, err := httpClient.PostForm(url, map[string][]string{ - api.OpVarChangefeedID: {id}, + api.OpVarChangefeedID: {id.ID}, }) if err != nil { return "", err @@ -151,7 +151,8 @@ func sendOwnerAdminChangeQuery(ctx context.Context, etcdClient *etcd.CDCEtcdClie resp, err := httpClient.PostForm(url, map[string][]string{ api.OpVarAdminJob: {fmt.Sprint(int(job.Type))}, - api.OpVarChangefeedID: {job.CfID}, + api.OpVarChangefeedID: {job.CfID.ID}, + "namespace": {job.CfID.Namespace}, api.OpForceRemoveChangefeed: {forceRemoveOpt}, }) if err != nil { diff --git a/pkg/cmd/cli/cli_changefeed_list.go b/pkg/cmd/cli/cli_changefeed_list.go index 179cac95213..790b294270d 100644 --- a/pkg/cmd/cli/cli_changefeed_list.go +++ b/pkg/cmd/cli/cli_changefeed_list.go @@ -14,9 +14,6 @@ package cli import ( - "encoding/json" - - "github.com/pingcap/log" "github.com/pingcap/tiflow/cdc/api" "github.com/pingcap/tiflow/pkg/cmd/context" "github.com/pingcap/tiflow/pkg/cmd/factory" @@ -24,7 +21,6 @@ import ( "github.com/pingcap/tiflow/pkg/etcd" "github.com/pingcap/tiflow/pkg/security" "github.com/spf13/cobra" - "go.uber.org/zap" ) // changefeedCommonInfo holds some common used information of a changefeed. @@ -93,24 +89,24 @@ func (o *listChangefeedOptions) run(cmd *cobra.Command) error { cfs := make([]*changefeedCommonInfo, 0, len(changefeedIDs)) - for id := range changefeedIDs { - cfci := &changefeedCommonInfo{ID: id} - - resp, err := sendOwnerChangefeedQuery(ctx, o.etcdClient, id, o.credential) - if err != nil { - // if no capture is available, the query will fail, just add a warning here - log.Warn("query changefeed info failed", zap.String("error", err.Error())) - } else { - info := &api.ChangefeedResp{} - err = json.Unmarshal([]byte(resp), info) - if err != nil { - return err - } - - cfci.Summary = info - } - cfs = append(cfs, cfci) - } + //for id := range changefeedIDs { + // cfci := &changefeedCommonInfo{ID: id} + // + // resp, err := sendOwnerChangefeedQuery(ctx, o.etcdClient, id, o.credential) + // if err != nil { + // // if no capture is available, the query will fail, just add a warning here + // log.Warn("query changefeed info failed", zap.String("error", err.Error())) + // } else { + // info := &api.ChangefeedResp{} + // err = json.Unmarshal([]byte(resp), info) + // if err != nil { + // return err + // } + // + // cfci.Summary = info + // } + // cfs = append(cfs, cfci) + //} return util.JSONPrint(cmd, cfs) } diff --git a/pkg/cmd/cli/cli_changefeed_pause.go b/pkg/cmd/cli/cli_changefeed_pause.go index 99cb9ff4e0c..2f4aae1a22d 100644 --- a/pkg/cmd/cli/cli_changefeed_pause.go +++ b/pkg/cmd/cli/cli_changefeed_pause.go @@ -60,7 +60,7 @@ func (o *pauseChangefeedOptions) complete(f factory.Factory) error { // run the `cli changefeed pause` command. func (o *pauseChangefeedOptions) run() error { job := model.AdminJob{ - CfID: o.changefeedID, + //CfID: o.changefeedID, Type: model.AdminStop, } diff --git a/pkg/cmd/cli/cli_changefeed_query.go b/pkg/cmd/cli/cli_changefeed_query.go index aae9b471709..e7998f6f267 100644 --- a/pkg/cmd/cli/cli_changefeed_query.go +++ b/pkg/cmd/cli/cli_changefeed_query.go @@ -14,16 +14,12 @@ package cli import ( - "github.com/pingcap/log" "github.com/pingcap/tiflow/cdc/model" - "github.com/pingcap/tiflow/pkg/cmd/context" "github.com/pingcap/tiflow/pkg/cmd/factory" "github.com/pingcap/tiflow/pkg/cmd/util" - cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/etcd" "github.com/pingcap/tiflow/pkg/security" "github.com/spf13/cobra" - "go.uber.org/zap" ) // captureTaskStatus holds capture task status. @@ -79,60 +75,60 @@ func (o *queryChangefeedOptions) complete(f factory.Factory) error { // run the `cli changefeed query` command. func (o *queryChangefeedOptions) run(cmd *cobra.Command) error { - ctx := context.GetDefaultContext() - - if o.simplified { - resp, err := sendOwnerChangefeedQuery(ctx, o.etcdClient, o.changefeedID, o.credential) - if err != nil { - return err - } - - cmd.Println(resp) - - return nil - } - - info, err := o.etcdClient.GetChangeFeedInfo(ctx, o.changefeedID) - if err != nil && cerror.ErrChangeFeedNotExists.NotEqual(err) { - return err - } - if info == nil { - log.Warn("This changefeed has been deleted, the residual meta data will be completely deleted within 24 hours.", zap.String("changgefeed", o.changefeedID)) - } - - status, _, err := o.etcdClient.GetChangeFeedStatus(ctx, o.changefeedID) - if err != nil && cerror.ErrChangeFeedNotExists.NotEqual(err) { - return err - } - - if err != nil && cerror.ErrChangeFeedNotExists.Equal(err) { - log.Error("This changefeed does not exist", zap.String("changefeed", o.changefeedID)) - return err - } - - taskPositions, err := o.etcdClient.GetAllTaskPositions(ctx, o.changefeedID) - if err != nil && cerror.ErrChangeFeedNotExists.NotEqual(err) { - return err - } - - var count uint64 - for _, pinfo := range taskPositions { - count += pinfo.Count - } - - processorInfos, err := o.etcdClient.GetAllTaskStatus(ctx, o.changefeedID) - if err != nil { - return err - } - - taskStatus := make([]captureTaskStatus, 0, len(processorInfos)) - for captureID, status := range processorInfos { - taskStatus = append(taskStatus, captureTaskStatus{CaptureID: captureID, TaskStatus: status}) - } - - meta := &cfMeta{Info: info, Status: status, Count: count, TaskStatus: taskStatus} - - return util.JSONPrint(cmd, meta) + //ctx := context.GetDefaultContext() + + //if o.simplified { + // resp, err := sendOwnerChangefeedQuery(ctx, o.etcdClient, o.changefeedID, o.credential) + // if err != nil { + // return err + // } + // + // cmd.Println(resp) + // + // return nil + //} + // + //info, err := o.etcdClient.GetChangeFeedInfo(ctx, o.changefeedID) + //if err != nil && cerror.ErrChangeFeedNotExists.NotEqual(err) { + // return err + //} + //if info == nil { + // log.Warn("This changefeed has been deleted, the residual meta data will be completely deleted within 24 hours.", zap.String("changgefeed", o.changefeedID)) + //} + // + //status, _, err := o.etcdClient.GetChangeFeedStatus(ctx, o.changefeedID) + //if err != nil && cerror.ErrChangeFeedNotExists.NotEqual(err) { + // return err + //} + // + //if err != nil && cerror.ErrChangeFeedNotExists.Equal(err) { + // log.Error("This changefeed does not exist", zap.String("changefeed", o.changefeedID)) + // return err + //} + // + //taskPositions, err := o.etcdClient.GetAllTaskPositions(ctx, o.changefeedID) + //if err != nil && cerror.ErrChangeFeedNotExists.NotEqual(err) { + // return err + //} + // + //var count uint64 + //for _, pinfo := range taskPositions { + // count += pinfo.Count + //} + // + //processorInfos, err := o.etcdClient.GetAllTaskStatus(ctx, o.changefeedID) + //if err != nil { + // return err + //} + // + //taskStatus := make([]captureTaskStatus, 0, len(processorInfos)) + //for captureID, status := range processorInfos { + // taskStatus = append(taskStatus, captureTaskStatus{CaptureID: captureID, TaskStatus: status}) + //} + // + //meta := &cfMeta{Info: info, Status: status, Count: count, TaskStatus: taskStatus} + + return util.JSONPrint(cmd, "") } // newCmdQueryChangefeed creates the `cli changefeed query` command. diff --git a/pkg/cmd/cli/cli_changefeed_remove.go b/pkg/cmd/cli/cli_changefeed_remove.go index 2abf814bfc4..405455b60e3 100644 --- a/pkg/cmd/cli/cli_changefeed_remove.go +++ b/pkg/cmd/cli/cli_changefeed_remove.go @@ -62,7 +62,7 @@ func (o *removeChangefeedOptions) complete(f factory.Factory) error { // run the `cli changefeed remove` command. func (o *removeChangefeedOptions) run() error { job := model.AdminJob{ - CfID: o.changefeedID, + //CfID: o.changefeedID, Type: model.AdminRemove, Opts: &model.AdminJobOption{ ForceRemove: o.optForceRemove, diff --git a/pkg/cmd/cli/cli_changefeed_resume.go b/pkg/cmd/cli/cli_changefeed_resume.go index 41ec4d4e815..a1fdde0c4af 100644 --- a/pkg/cmd/cli/cli_changefeed_resume.go +++ b/pkg/cmd/cli/cli_changefeed_resume.go @@ -15,9 +15,7 @@ package cli import ( "context" - "encoding/json" - "github.com/pingcap/tiflow/cdc/api" "github.com/pingcap/tiflow/cdc/model" cmdcontext "github.com/pingcap/tiflow/pkg/cmd/context" "github.com/pingcap/tiflow/pkg/cmd/factory" @@ -74,25 +72,25 @@ func (o *resumeChangefeedOptions) complete(f factory.Factory) error { // confirmResumeChangefeedCheck prompts the user to confirm the use of a large data gap when noConfirm is turned off. func (o *resumeChangefeedOptions) confirmResumeChangefeedCheck(ctx context.Context, cmd *cobra.Command) error { - resp, err := sendOwnerChangefeedQuery(ctx, o.etcdClient, o.changefeedID, o.credential) - if err != nil { - return err - } - - info := &api.ChangefeedResp{} - err = json.Unmarshal([]byte(resp), info) - if err != nil { - return err - } - - currentPhysical, _, err := o.pdClient.GetTS(ctx) - if err != nil { - return err - } - - if !o.noConfirm { - return confirmLargeDataGap(cmd, currentPhysical, info.TSO) - } + //resp, err := sendOwnerChangefeedQuery(ctx, o.etcdClient, o.changefeedID, o.credential) + //if err != nil { + // return err + //} + // + //info := &api.ChangefeedResp{} + //err = json.Unmarshal([]byte(resp), info) + //if err != nil { + // return err + //} + // + //currentPhysical, _, err := o.pdClient.GetTS(ctx) + //if err != nil { + // return err + //} + // + //if !o.noConfirm { + // return confirmLargeDataGap(cmd, currentPhysical, info.TSO) + //} return nil } @@ -106,7 +104,7 @@ func (o *resumeChangefeedOptions) run(cmd *cobra.Command) error { } job := model.AdminJob{ - CfID: o.changefeedID, + //CfID: o.changefeedID, Type: model.AdminResume, } diff --git a/pkg/cmd/cli/cli_changefeed_statistics.go b/pkg/cmd/cli/cli_changefeed_statistics.go index c34f94ef951..8618686f75f 100644 --- a/pkg/cmd/cli/cli_changefeed_statistics.go +++ b/pkg/cmd/cli/cli_changefeed_statistics.go @@ -15,7 +15,6 @@ package cli import ( "context" - "fmt" "time" "github.com/pingcap/errors" @@ -28,7 +27,6 @@ import ( "github.com/pingcap/tiflow/pkg/etcd" "github.com/pingcap/tiflow/pkg/version" "github.com/spf13/cobra" - "github.com/tikv/client-go/v2/oracle" pd "github.com/tikv/pd/client" "go.uber.org/zap" ) @@ -112,81 +110,81 @@ func (o *statisticsChangefeedOptions) complete(f factory.Factory) error { // run cli command with etcd client func (o *statisticsChangefeedOptions) runCliWithEtcdClient(ctx context.Context, cmd *cobra.Command, lastCount *uint64, lastTime *time.Time) error { - now := time.Now() - - changefeedStatus, _, err := o.etcdClient.GetChangeFeedStatus(ctx, o.changefeedID) - if err != nil { - return err - } - - taskPositions, err := o.etcdClient.GetAllTaskPositions(ctx, o.changefeedID) - if err != nil { - return err - } - - var count uint64 - for _, pinfo := range taskPositions { - count += pinfo.Count - } - - ts, _, err := o.pdClient.GetTS(ctx) - if err != nil { - return err - } - - sinkGap := oracle.ExtractPhysical(changefeedStatus.ResolvedTs) - oracle.ExtractPhysical(changefeedStatus.CheckpointTs) - replicationGap := ts - oracle.ExtractPhysical(changefeedStatus.CheckpointTs) - - statistics := status{ - OPS: (count - (*lastCount)) / uint64(now.Unix()-lastTime.Unix()), - SinkGap: fmt.Sprintf("%dms", sinkGap), - ReplicationGap: fmt.Sprintf("%dms", replicationGap), - Count: count, - } - - *lastCount = count - *lastTime = now - return util.JSONPrint(cmd, statistics) -} - -// run cli command with api client -func (o *statisticsChangefeedOptions) runCliWithAPIClient(ctx context.Context, cmd *cobra.Command, lastCount *uint64, lastTime *time.Time) error { - now := time.Now() - var count uint64 - captures, err := o.apiClient.Captures().List(ctx) - if err != nil { - return err - } - - for _, capture := range *captures { - processor, err := o.apiClient.Processors().Get(ctx, o.changefeedID, capture.ID) - if err != nil { - return err - } - count += processor.Count - } - - ts, _, err := o.pdClient.GetTS(ctx) - if err != nil { - return err - } - changefeed, err := o.apiClient.Changefeeds().Get(ctx, o.changefeedID) - if err != nil { - return err - } - - sinkGap := oracle.ExtractPhysical(changefeed.ResolvedTs) - oracle.ExtractPhysical(changefeed.CheckpointTSO) - replicationGap := ts - oracle.ExtractPhysical(changefeed.CheckpointTSO) - statistics := status{ - OPS: (count - (*lastCount)) / uint64(now.Unix()-lastTime.Unix()), - SinkGap: fmt.Sprintf("%dms", sinkGap), - ReplicationGap: fmt.Sprintf("%dms", replicationGap), - Count: count, - } - - *lastCount = count - *lastTime = now - return util.JSONPrint(cmd, statistics) + // now := time.Now() + // + // changefeedStatus, _, err := o.etcdClient.GetChangeFeedStatus(ctx, o.changefeedID) + // if err != nil { + // return err + // } + // + // taskPositions, err := o.etcdClient.GetAllTaskPositions(ctx, o.changefeedID) + // if err != nil { + // return err + // } + // + // var count uint64 + // for _, pinfo := range taskPositions { + // count += pinfo.Count + // } + // + // ts, _, err := o.pdClient.GetTS(ctx) + // if err != nil { + // return err + // } + // + // sinkGap := oracle.ExtractPhysical(changefeedStatus.ResolvedTs) - oracle.ExtractPhysical(changefeedStatus.CheckpointTs) + // replicationGap := ts - oracle.ExtractPhysical(changefeedStatus.CheckpointTs) + // + // statistics := status{ + // OPS: (count - (*lastCount)) / uint64(now.Unix()-lastTime.Unix()), + // SinkGap: fmt.Sprintf("%dms", sinkGap), + // ReplicationGap: fmt.Sprintf("%dms", replicationGap), + // Count: count, + // } + // + // *lastCount = count + // *lastTime = now + // return util.JSONPrint(cmd, statistics) + //} + // + //// run cli command with api client + //func (o *statisticsChangefeedOptions) runCliWithAPIClient(ctx context.Context, cmd *cobra.Command, lastCount *uint64, lastTime *time.Time) error { + // now := time.Now() + // var count uint64 + // captures, err := o.apiClient.Captures().List(ctx) + // if err != nil { + // return err + // } + // + // for _, capture := range *captures { + // processor, err := o.apiClient.Processors().Get(ctx, o.changefeedID, capture.ID) + // if err != nil { + // return err + // } + // count += processor.Count + // } + // + // ts, _, err := o.pdClient.GetTS(ctx) + // if err != nil { + // return err + // } + // changefeed, err := o.apiClient.Changefeeds().Get(ctx, o.changefeedID) + // if err != nil { + // return err + // } + // + // sinkGap := oracle.ExtractPhysical(changefeed.ResolvedTs) - oracle.ExtractPhysical(changefeed.CheckpointTSO) + // replicationGap := ts - oracle.ExtractPhysical(changefeed.CheckpointTSO) + // statistics := status{ + // OPS: (count - (*lastCount)) / uint64(now.Unix()-lastTime.Unix()), + // SinkGap: fmt.Sprintf("%dms", sinkGap), + // ReplicationGap: fmt.Sprintf("%dms", replicationGap), + // Count: count, + // } + // + // *lastCount = count + // *lastTime = now + return util.JSONPrint(cmd, "statistics") } // run the `cli changefeed statistics` command. @@ -194,8 +192,8 @@ func (o *statisticsChangefeedOptions) run(cmd *cobra.Command) error { ctx := cmdcontext.GetDefaultContext() tick := time.NewTicker(time.Duration(o.interval) * time.Second) - var lastTime time.Time - var lastCount uint64 + //var lastTime time.Time + //var lastCount uint64 for { select { @@ -204,11 +202,11 @@ func (o *statisticsChangefeedOptions) run(cmd *cobra.Command) error { return err } case <-tick.C: - if o.runWithAPIClient { - _ = o.runCliWithAPIClient(ctx, cmd, &lastCount, &lastTime) - } else { - _ = o.runCliWithEtcdClient(ctx, cmd, &lastCount, &lastTime) - } + //if o.runWithAPIClient { + // _ = o.runCliWithAPIClient(ctx, cmd, &lastCount, &lastTime) + //} else { + // _ = o.runCliWithEtcdClient(ctx, cmd, &lastCount, &lastTime) + //} } } } diff --git a/pkg/cmd/cli/cli_changefeed_update.go b/pkg/cmd/cli/cli_changefeed_update.go index 31c2e8905c4..6bb7243f105 100644 --- a/pkg/cmd/cli/cli_changefeed_update.go +++ b/pkg/cmd/cli/cli_changefeed_update.go @@ -14,19 +14,14 @@ package cli import ( - "fmt" "strings" "github.com/pingcap/tiflow/pkg/etcd" - "github.com/pingcap/errors" "github.com/pingcap/log" "github.com/pingcap/tiflow/cdc/model" - cmdcontext "github.com/pingcap/tiflow/pkg/cmd/context" "github.com/pingcap/tiflow/pkg/cmd/factory" - cerror "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/security" - "github.com/r3labs/diff" "github.com/spf13/cobra" "github.com/spf13/pflag" "go.uber.org/zap" @@ -77,65 +72,65 @@ func (o *updateChangefeedOptions) complete(f factory.Factory) error { // run the `cli changefeed update` command. func (o *updateChangefeedOptions) run(cmd *cobra.Command) error { - ctx := cmdcontext.GetDefaultContext() - - resp, err := sendOwnerChangefeedQuery(ctx, o.etcdClient, o.changefeedID, o.credential) - // if no cdc owner exists, allow user to update changefeed config - if err != nil && errors.Cause(err) != cerror.ErrOwnerNotFound { - return err - } - // Note that the correctness of the logic here depends on the return value of `/capture/owner/changefeed/query` interface. - // TODO: Using error codes instead of string containing judgments - if err == nil && !strings.Contains(resp, `"state": "stopped"`) { - return errors.Errorf("can only update changefeed config when it is stopped\nstatus: %s", resp) - } - - old, err := o.etcdClient.GetChangeFeedInfo(ctx, o.changefeedID) - if err != nil { - return err - } - - newInfo, err := o.applyChanges(old, cmd) - if err != nil { - return err - } - - changelog, err := diff.Diff(old, newInfo) - if err != nil { - return err - } - if len(changelog) == 0 { - cmd.Printf("changefeed config is the same with the old one, do nothing\n") - return nil - } - cmd.Printf("Diff of changefeed config:\n") - for _, change := range changelog { - cmd.Printf("%+v\n", change) - } - - if !o.commonChangefeedOptions.noConfirm { - cmd.Printf("Could you agree to apply changes above to changefeed [Y/N]\n") - var yOrN string - _, err = fmt.Scan(&yOrN) - if err != nil { - return err - } - if strings.ToLower(strings.TrimSpace(yOrN)) != "y" { - cmd.Printf("No update to changefeed.\n") - return nil - } - } - - err = o.etcdClient.SaveChangeFeedInfo(ctx, newInfo, o.changefeedID) - if err != nil { - return err - } - infoStr, err := newInfo.Marshal() - if err != nil { - return err - } - cmd.Printf("Update changefeed config successfully! "+ - "\nID: %s\nInfo: %s\n", o.changefeedID, infoStr) + //ctx := cmdcontext.GetDefaultContext() + // + //resp, err := sendOwnerChangefeedQuery(ctx, o.etcdClient, o.changefeedID, o.credential) + //// if no cdc owner exists, allow user to update changefeed config + //if err != nil && errors.Cause(err) != cerror.ErrOwnerNotFound { + // return err + //} + //// Note that the correctness of the logic here depends on the return value of `/capture/owner/changefeed/query` interface. + //// TODO: Using error codes instead of string containing judgments + //if err == nil && !strings.Contains(resp, `"state": "stopped"`) { + // return errors.Errorf("can only update changefeed config when it is stopped\nstatus: %s", resp) + //} + // + //old, err := o.etcdClient.GetChangeFeedInfo(ctx, o.changefeedID) + //if err != nil { + // return err + //} + // + //newInfo, err := o.applyChanges(old, cmd) + //if err != nil { + // return err + //} + // + //changelog, err := diff.Diff(old, newInfo) + //if err != nil { + // return err + //} + //if len(changelog) == 0 { + // cmd.Printf("changefeed config is the same with the old one, do nothing\n") + // return nil + //} + //cmd.Printf("Diff of changefeed config:\n") + //for _, change := range changelog { + // cmd.Printf("%+v\n", change) + //} + // + //if !o.commonChangefeedOptions.noConfirm { + // cmd.Printf("Could you agree to apply changes above to changefeed [Y/N]\n") + // var yOrN string + // _, err = fmt.Scan(&yOrN) + // if err != nil { + // return err + // } + // if strings.ToLower(strings.TrimSpace(yOrN)) != "y" { + // cmd.Printf("No update to changefeed.\n") + // return nil + // } + //} + // + //err = o.etcdClient.SaveChangeFeedInfo(ctx, newInfo, o.changefeedID) + //if err != nil { + // return err + //} + //infoStr, err := newInfo.Marshal() + //if err != nil { + // return err + //} + //cmd.Printf("Update changefeed config successfully! "+ + // "\nID: %s\nInfo: %s\n", o.changefeedID, infoStr) return nil } diff --git a/pkg/cmd/cli/cli_processor_query.go b/pkg/cmd/cli/cli_processor_query.go index 85a1eceaad2..9f84fc2bf8b 100644 --- a/pkg/cmd/cli/cli_processor_query.go +++ b/pkg/cmd/cli/cli_processor_query.go @@ -41,6 +41,7 @@ type queryProcessorOptions struct { apiClient apiv1client.APIV1Interface changefeedID string + namespace string captureID string runWithAPIClient bool } @@ -99,7 +100,7 @@ func (o *queryProcessorOptions) addFlags(cmd *cobra.Command) { // run cli cmd with etcd client func (o *queryProcessorOptions) runCliWithEtcdClient(ctx context.Context, cmd *cobra.Command) error { - _, status, err := o.etcdClient.GetTaskStatus(ctx, o.changefeedID, o.captureID) + _, status, err := o.etcdClient.GetTaskStatus(ctx, model.ChangeFeedID{Namespace: o.namespace, ID: o.changefeedID}, o.captureID) if err != nil && cerror.ErrTaskStatusNotExists.Equal(err) { return err } diff --git a/pkg/cmd/cli/cli_unsafe_resolve_lock.go b/pkg/cmd/cli/cli_unsafe_resolve_lock.go index 510705f3cf2..109f95c862e 100644 --- a/pkg/cmd/cli/cli_unsafe_resolve_lock.go +++ b/pkg/cmd/cli/cli_unsafe_resolve_lock.go @@ -18,6 +18,7 @@ import ( "github.com/pingcap/log" "github.com/pingcap/tidb/kv" + "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/cmd/context" "github.com/pingcap/tiflow/pkg/cmd/factory" "github.com/pingcap/tiflow/pkg/txnutil" @@ -74,7 +75,7 @@ func (o *unsafeResolveLockOptions) run() error { } log.ReplaceGlobals(lg, p) txnResolver := txnutil.NewLockerResolver(o.kvStorage.(tikv.Storage), - "changefeed-client", util.RoleClient) + model.NewDefaultChangefeedID("changefeed-client"), util.RoleClient) return txnResolver.Resolve(ctx, o.regionID, o.ts) } diff --git a/pkg/cmd/server/server.go b/pkg/cmd/server/server.go index d3a336bcc9b..a4a803dc3db 100644 --- a/pkg/cmd/server/server.go +++ b/pkg/cmd/server/server.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/log" ticonfig "github.com/pingcap/tidb/config" "github.com/pingcap/tiflow/cdc" + ctx2 "github.com/pingcap/tiflow/cdc/contextutil" "github.com/pingcap/tiflow/cdc/sorter/unified" cmdcontext "github.com/pingcap/tiflow/pkg/cmd/context" "github.com/pingcap/tiflow/pkg/cmd/util" @@ -61,6 +62,7 @@ func newOptions() *options { // addFlags receives a *cobra.Command reference and binds // flags related to template printing to it. func (o *options) addFlags(cmd *cobra.Command) { + cmd.Flags().StringVar(&o.serverConfig.ClusterID, "cluster-id", "default", "ticdc cluster id") cmd.Flags().StringVar(&o.serverConfig.Addr, "addr", o.serverConfig.Addr, "Set the listening address") cmd.Flags().StringVar(&o.serverConfig.AdvertiseAddr, "advertise-addr", o.serverConfig.AdvertiseAddr, "Set the advertise listening address for client communication") @@ -125,9 +127,13 @@ func (o *options) run(cmd *cobra.Command) error { return errors.Annotate(err, "can not load timezone, Please specify the time zone through environment variable `TZ` or command line parameters `--tz`") } + // ticdc 开启支持多上游 + o.serverConfig.Debug.ServerPdAddr = o.serverPdAddr + o.serverConfig.Debug.EnableMultiUpStream = true + config.StoreGlobalServerConfig(o.serverConfig) - ctx := ticdcutil.PutTimezoneInCtx(cmdcontext.GetDefaultContext(), tz) - ctx = ticdcutil.PutCaptureAddrInCtx(ctx, o.serverConfig.AdvertiseAddr) + ctx := ctx2.PutTimezoneInCtx(cmdcontext.GetDefaultContext(), tz) + ctx = ctx2.PutCaptureAddrInCtx(ctx, o.serverConfig.AdvertiseAddr) version.LogVersionInfo() if ticdcutil.FailpointBuild { @@ -226,6 +232,8 @@ func (o *options) complete(cmd *cobra.Command) error { "sort-dir will be set to `{data-dir}/tmp/sorter`. The sort-dir here will be no-op\n")) } cfg.Sorter.SortDir = config.DefaultSortDir + case "cluster-id": + cfg.ClusterID = o.serverConfig.ClusterID case "pd", "config": // do nothing default: diff --git a/pkg/config/debug.go b/pkg/config/debug.go index 19df3699ab3..e55adcd5bea 100644 --- a/pkg/config/debug.go +++ b/pkg/config/debug.go @@ -31,6 +31,9 @@ type DebugConfig struct { // The default value is true. EnableNewScheduler bool `toml:"enable-new-scheduler" json:"enable-new-scheduler"` Messages *MessagesConfig `toml:"messages" json:"messages"` + + EnableMultiUpStream bool `toml:"enable-multi-up-stream" json:"enable-multi-up-stream"` + ServerPdAddr string `toml:"server-pd-addr" json:"server-pd-addr"` } // ValidateAndAdjust validates and adjusts the debug configuration diff --git a/pkg/config/server_config.go b/pkg/config/server_config.go index a0cb01d1570..228630e23d5 100644 --- a/pkg/config/server_config.go +++ b/pkg/config/server_config.go @@ -126,6 +126,7 @@ var defaultServerConfig = &ServerConfig{ }, Messages: defaultMessageConfig.Clone(), }, + ClusterID: "default", } // ServerConfig represents a config for server @@ -152,6 +153,7 @@ type ServerConfig struct { PerTableMemoryQuota uint64 `toml:"per-table-memory-quota" json:"per-table-memory-quota"` KVClient *KVClientConfig `toml:"kv-client" json:"kv-client"` Debug *DebugConfig `toml:"debug" json:"debug"` + ClusterID string `toml:"cluster-id" json:"cluster-id"` } // Marshal returns the json marshal format of a ServerConfig diff --git a/pkg/context/context.go b/pkg/context/context.go index db8bac2bf6b..dbe1cdf3c62 100644 --- a/pkg/context/context.go +++ b/pkg/context/context.go @@ -205,7 +205,7 @@ func NewContext4Test(baseCtx context.Context, withChangefeedVars bool) Context { }) if withChangefeedVars { ctx = WithChangefeedVars(ctx, &ChangefeedVars{ - ID: "changefeed-id-test", + ID: model.ChangeFeedID{"default", "changefeed-id-test"}, Info: &model.ChangeFeedInfo{ StartTs: oracle.GoTimeToTS(time.Now()), Config: config.GetDefaultReplicaConfig(), @@ -228,5 +228,5 @@ func ZapFieldCapture(ctx Context) zap.Field { // ZapFieldChangefeed returns a zap field containing changefeed id func ZapFieldChangefeed(ctx Context) zap.Field { - return zap.String("changefeed", ctx.ChangefeedVars().ID) + return zap.String("changefeed", ctx.ChangefeedVars().ID.ID) } diff --git a/pkg/etcd/etcd.go b/pkg/etcd/etcd.go index fb638119985..220c581f62b 100644 --- a/pkg/etcd/etcd.go +++ b/pkg/etcd/etcd.go @@ -37,48 +37,52 @@ import ( ) const ( - // CaptureOwnerKey is the capture owner path that is saved to etcd - CaptureOwnerKey = EtcdKeyBase + "/owner" - // CaptureInfoKeyPrefix is the capture info path that is saved to etcd - CaptureInfoKeyPrefix = EtcdKeyBase + "/capture" - // TaskKeyPrefix is the prefix of task keys - TaskKeyPrefix = EtcdKeyBase + "/task" - // TaskStatusKeyPrefix is the prefix of task status keys - TaskStatusKeyPrefix = TaskKeyPrefix + "/status" - // TaskPositionKeyPrefix is the prefix of task position keys - TaskPositionKeyPrefix = TaskKeyPrefix + "/position" - // JobKeyPrefix is the prefix of job keys - JobKeyPrefix = EtcdKeyBase + "/job" + +// CaptureInfoKeyPrefix is the capture info path that is saved to etcd +//CaptureInfoKeyPrefix = EtcdKeyBase + "/capture" +// TaskKeyPrefix is the prefix of task keys +//TaskKeyPrefix = EtcdKeyBase + "/task" +// TaskStatusKeyPrefix is the prefix of task status keys +//TaskStatusKeyPrefix = TaskKeyPrefix + "/status" +// TaskPositionKeyPrefix is the prefix of task position keys +//TaskPositionKeyPrefix = TaskKeyPrefix + "/position" +// JobKeyPrefix is the prefix of job keys +//JobKeyPrefix = EtcdKeyBase + "/job" ) +// CaptureOwnerKey is the capture owner path that is saved to etcd +func CaptureOwnerKey() string { + return EtcdKeyBase() + CDCMetaPrefix + ownerKey +} + // GetEtcdKeyChangeFeedList returns the prefix key of all changefeed config -func GetEtcdKeyChangeFeedList() string { - return fmt.Sprintf("%s/changefeed/info", EtcdKeyBase) +func GetEtcdKeyChangeFeedList(namespace string) string { + return fmt.Sprintf("%s/changefeed/info", NamespacedPrefix(namespace)) } // GetEtcdKeyChangeFeedInfo returns the key of a changefeed config -func GetEtcdKeyChangeFeedInfo(changefeedID string) string { - return fmt.Sprintf("%s/%s", GetEtcdKeyChangeFeedList(), changefeedID) +func GetEtcdKeyChangeFeedInfo(changefeedID model.ChangeFeedID) string { + return fmt.Sprintf("%s/%s", GetEtcdKeyChangeFeedList(changefeedID.Namespace), changefeedID.ID) } // GetEtcdKeyTaskPosition returns the key of a task position -func GetEtcdKeyTaskPosition(changefeedID, captureID string) string { - return TaskPositionKeyPrefix + "/" + captureID + "/" + changefeedID +func GetEtcdKeyTaskPosition(changefeedID model.ChangeFeedID, captureID string) string { + return NamespacedPrefix(changefeedID.Namespace) + "/" + captureID + "/" + changefeedID.ID } // GetEtcdKeyCaptureInfo returns the key of a capture info func GetEtcdKeyCaptureInfo(id string) string { - return CaptureInfoKeyPrefix + "/" + id + return CaptureInfoKeyPrefix() + "/" + id } // GetEtcdKeyTaskStatus returns the key for the task status -func GetEtcdKeyTaskStatus(changeFeedID, captureID string) string { - return TaskStatusKeyPrefix + "/" + captureID + "/" + changeFeedID +func GetEtcdKeyTaskStatus(changeFeedID model.ChangeFeedID, captureID string) string { + return NamespacedPrefix(changeFeedID.Namespace) + taskKey + "/" + captureID + "/" + changeFeedID.ID } // GetEtcdKeyJob returns the key for a job status -func GetEtcdKeyJob(changeFeedID string) string { - return JobKeyPrefix + "/" + changeFeedID +func GetEtcdKeyJob(changeFeedID model.ChangeFeedID) string { + return NamespacedPrefix(changeFeedID.Namespace) + jobKey + "/" + changeFeedID.ID } // CDCEtcdClient is a wrap of etcd client @@ -106,13 +110,13 @@ func (c CDCEtcdClient) Close() error { // ClearAllCDCInfo delete all keys created by CDC func (c CDCEtcdClient) ClearAllCDCInfo(ctx context.Context) error { - _, err := c.Client.Delete(ctx, EtcdKeyBase, clientv3.WithPrefix()) + _, err := c.Client.Delete(ctx, EtcdKeyBase(), clientv3.WithPrefix()) return cerror.WrapError(cerror.ErrPDEtcdAPIError, err) } // GetAllCDCInfo get all keys created by CDC func (c CDCEtcdClient) GetAllCDCInfo(ctx context.Context) ([]*mvccpb.KeyValue, error) { - resp, err := c.Client.Get(ctx, EtcdKeyBase, clientv3.WithPrefix()) + resp, err := c.Client.Get(ctx, EtcdKeyBase(), clientv3.WithPrefix()) if err != nil { return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) } @@ -121,22 +125,23 @@ func (c CDCEtcdClient) GetAllCDCInfo(ctx context.Context) ([]*mvccpb.KeyValue, e // GetChangeFeeds returns kv revision and a map mapping from changefeedID to changefeed detail mvccpb.KeyValue func (c CDCEtcdClient) GetChangeFeeds(ctx context.Context) (int64, map[string]*mvccpb.KeyValue, error) { - key := GetEtcdKeyChangeFeedList() - - resp, err := c.Client.Get(ctx, key, clientv3.WithPrefix()) - if err != nil { - return 0, nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) - } - revision := resp.Header.Revision - details := make(map[string]*mvccpb.KeyValue, resp.Count) - for _, kv := range resp.Kvs { - id, err := extractKeySuffix(string(kv.Key)) - if err != nil { - return 0, nil, err - } - details[id] = kv - } - return revision, details, nil + //key := GetEtcdKeyChangeFeedList() + // + //resp, err := c.Client.Get(ctx, key, clientv3.WithPrefix()) + //if err != nil { + // return 0, nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) + //} + //revision := resp.Header.Revision + //details := make(map[string]*mvccpb.KeyValue, resp.Count) + //for _, kv := range resp.Kvs { + // id, err := extractKeySuffix(string(kv.Key)) + // if err != nil { + // return 0, nil, err + // } + // details[id] = kv + //} + //return revision, details, nil + return 0, nil, nil } // GetAllChangeFeedInfo queries all changefeed information @@ -158,7 +163,7 @@ func (c CDCEtcdClient) GetAllChangeFeedInfo(ctx context.Context) (map[string]*mo } // GetChangeFeedInfo queries the config of a given changefeed -func (c CDCEtcdClient) GetChangeFeedInfo(ctx context.Context, id string) (*model.ChangeFeedInfo, error) { +func (c CDCEtcdClient) GetChangeFeedInfo(ctx context.Context, id model.ChangeFeedID) (*model.ChangeFeedInfo, error) { key := GetEtcdKeyChangeFeedInfo(id) resp, err := c.Client.Get(ctx, key) if err != nil { @@ -173,7 +178,7 @@ func (c CDCEtcdClient) GetChangeFeedInfo(ctx context.Context, id string) (*model } // DeleteChangeFeedInfo deletes a changefeed config from etcd -func (c CDCEtcdClient) DeleteChangeFeedInfo(ctx context.Context, id string) error { +func (c CDCEtcdClient) DeleteChangeFeedInfo(ctx context.Context, id model.ChangeFeedID) error { key := GetEtcdKeyChangeFeedInfo(id) _, err := c.Client.Delete(ctx, key) return cerror.WrapError(cerror.ErrPDEtcdAPIError, err) @@ -181,29 +186,30 @@ func (c CDCEtcdClient) DeleteChangeFeedInfo(ctx context.Context, id string) erro // GetAllChangeFeedStatus queries all changefeed job status func (c CDCEtcdClient) GetAllChangeFeedStatus(ctx context.Context) (map[string]*model.ChangeFeedStatus, error) { - key := JobKeyPrefix - resp, err := c.Client.Get(ctx, key, clientv3.WithPrefix()) - if err != nil { - return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) - } - statuses := make(map[string]*model.ChangeFeedStatus, resp.Count) - for _, rawKv := range resp.Kvs { - changefeedID, err := extractKeySuffix(string(rawKv.Key)) - if err != nil { - return nil, err - } - status := &model.ChangeFeedStatus{} - err = status.Unmarshal(rawKv.Value) - if err != nil { - return nil, errors.Trace(err) - } - statuses[changefeedID] = status - } - return statuses, nil + //key := JobKeyPrefix() + //resp, err := c.Client.Get(ctx, key, clientv3.WithPrefix()) + //if err != nil { + // return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) + //} + //statuses := make(map[string]*model.ChangeFeedStatus, resp.Count) + //for _, rawKv := range resp.Kvs { + // changefeedID, err := extractKeySuffix(string(rawKv.Key)) + // if err != nil { + // return nil, err + // } + // status := &model.ChangeFeedStatus{} + // err = status.Unmarshal(rawKv.Value) + // if err != nil { + // return nil, errors.Trace(err) + // } + // statuses[changefeedID] = status + //} + //return statuses, nil + return nil, nil } // GetChangeFeedStatus queries the checkpointTs and resovledTs of a given changefeed -func (c CDCEtcdClient) GetChangeFeedStatus(ctx context.Context, id string) (*model.ChangeFeedStatus, int64, error) { +func (c CDCEtcdClient) GetChangeFeedStatus(ctx context.Context, id model.ChangeFeedID) (*model.ChangeFeedStatus, int64, error) { key := GetEtcdKeyJob(id) resp, err := c.Client.Get(ctx, key) if err != nil { @@ -219,7 +225,7 @@ func (c CDCEtcdClient) GetChangeFeedStatus(ctx context.Context, id string) (*mod // GetCaptures returns kv revision and CaptureInfo list func (c CDCEtcdClient) GetCaptures(ctx context.Context) (int64, []*model.CaptureInfo, error) { - key := CaptureInfoKeyPrefix + key := CaptureInfoKeyPrefix() resp, err := c.Client.Get(ctx, key, clientv3.WithPrefix()) if err != nil { @@ -261,9 +267,13 @@ func (c CDCEtcdClient) GetCaptureInfo(ctx context.Context, id string) (info *mod return } +func CaptureInfoKeyPrefix() string { + return EtcdKeyBase() + CDCMetaPrefix + captureKey +} + // GetCaptureLeases returns a map mapping from capture ID to its lease func (c CDCEtcdClient) GetCaptureLeases(ctx context.Context) (map[string]int64, error) { - key := CaptureInfoKeyPrefix + key := CaptureInfoKeyPrefix() resp, err := c.Client.Get(ctx, key, clientv3.WithPrefix()) if err != nil { @@ -296,20 +306,46 @@ func (c CDCEtcdClient) RevokeAllLeases(ctx context.Context, leases map[string]in } // CreateChangefeedInfo creates a change feed info into etcd and fails if it is already exists. -func (c CDCEtcdClient) CreateChangefeedInfo(ctx context.Context, info *model.ChangeFeedInfo, changeFeedID string) error { +func (c CDCEtcdClient) CreateChangefeedInfo(ctx context.Context, cdcClusterID string, upstreamInfo *model.UpstreamInfo, info *model.ChangeFeedInfo, changeFeedID model.ChangeFeedID) error { infoKey := GetEtcdKeyChangeFeedInfo(changeFeedID) jobKey := GetEtcdKeyJob(changeFeedID) value, err := info.Marshal() if err != nil { return errors.Trace(err) } - - cmps := []clientv3.Cmp{ - clientv3.Compare(clientv3.ModRevision(infoKey), "=", 0), - clientv3.Compare(clientv3.ModRevision(jobKey), "=", 0), + upstram, err := upstreamInfo.Marshal() + if err != nil { + return errors.Trace(err) } - opsThen := []clientv3.Op{ - clientv3.OpPut(infoKey, value), + + upstreamKey := CDCClusterBase(cdcClusterID) + CDCMetaPrefix + upstreamInfoKey + "/" + info.UpstreamID + + getresp, err := c.Client.Get(ctx, upstreamKey, clientv3.WithFirstCreate()...) + if err != nil { + return err + } + hasUpstream := len(getresp.Kvs) > 0 + var cmps []clientv3.Cmp + var opsThen []clientv3.Op + if hasUpstream { //already has an upstream config in etcd + cmps = []clientv3.Cmp{ + clientv3.Compare(clientv3.ModRevision(infoKey), "=", 0), + clientv3.Compare(clientv3.ModRevision(jobKey), "=", 0), + clientv3.Compare(clientv3.Value(upstreamKey), "!=", ""), + } + opsThen = []clientv3.Op{ + clientv3.OpPut(infoKey, value), + } + } else { + cmps = []clientv3.Cmp{ + clientv3.Compare(clientv3.ModRevision(infoKey), "=", 0), + clientv3.Compare(clientv3.ModRevision(jobKey), "=", 0), + clientv3.Compare(clientv3.ModRevision(upstreamKey), "=", 0), + } + opsThen = []clientv3.Op{ + clientv3.OpPut(upstreamKey, string(upstram)), + clientv3.OpPut(infoKey, value), + } } resp, err := c.Client.Txn(ctx, cmps, opsThen, TxnEmptyOpsElse) if err != nil { @@ -317,7 +353,7 @@ func (c CDCEtcdClient) CreateChangefeedInfo(ctx context.Context, info *model.Cha } if !resp.Succeeded { log.Warn("changefeed already exists, ignore create changefeed", - zap.String("changefeed", changeFeedID)) + zap.String("changefeed", changeFeedID.String())) return cerror.ErrChangeFeedAlreadyExists.GenWithStackByArgs(changeFeedID) } return errors.Trace(err) @@ -325,7 +361,7 @@ func (c CDCEtcdClient) CreateChangefeedInfo(ctx context.Context, info *model.Cha // SaveChangeFeedInfo stores change feed info into etcd // TODO: this should be called from outer system, such as from a TiDB client -func (c CDCEtcdClient) SaveChangeFeedInfo(ctx context.Context, info *model.ChangeFeedInfo, changeFeedID string) error { +func (c CDCEtcdClient) SaveChangeFeedInfo(ctx context.Context, info *model.ChangeFeedInfo, changeFeedID model.ChangeFeedID) error { key := GetEtcdKeyChangeFeedInfo(changeFeedID) value, err := info.Marshal() if err != nil { @@ -338,60 +374,62 @@ func (c CDCEtcdClient) SaveChangeFeedInfo(ctx context.Context, info *model.Chang // GetProcessors queries all processors of the cdc cluster, // and returns a slice of ProcInfoSnap(without table info) func (c CDCEtcdClient) GetProcessors(ctx context.Context) ([]*model.ProcInfoSnap, error) { - resp, err := c.Client.Get(ctx, TaskStatusKeyPrefix, clientv3.WithPrefix()) - if err != nil { - return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) - } - infos := make([]*model.ProcInfoSnap, 0, resp.Count) - for _, rawKv := range resp.Kvs { - changefeedID, err := extractKeySuffix(string(rawKv.Key)) - if err != nil { - return nil, err - } - endIndex := len(rawKv.Key) - len(changefeedID) - 1 - captureID, err := extractKeySuffix(string(rawKv.Key[0:endIndex])) - if err != nil { - return nil, err - } - info := &model.ProcInfoSnap{ - CfID: changefeedID, - CaptureID: captureID, - } - infos = append(infos, info) - } - return infos, nil + return nil, nil + //resp, err := c.Client.Get(ctx, TaskStatusKeyPrefix, clientv3.WithPrefix()) + //if err != nil { + // return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) + //} + //infos := make([]*model.ProcInfoSnap, 0, resp.Count) + //for _, rawKv := range resp.Kvs { + // changefeedID, err := extractKeySuffix(string(rawKv.Key)) + // if err != nil { + // return nil, err + // } + // endIndex := len(rawKv.Key) - len(changefeedID) - 1 + // captureID, err := extractKeySuffix(string(rawKv.Key[0:endIndex])) + // if err != nil { + // return nil, err + // } + // info := &model.ProcInfoSnap{ + // CfID: changefeedID, + // CaptureID: captureID, + // } + // infos = append(infos, info) + //} + //return infos, nil } // GetAllTaskStatus queries all task status of a changefeed, and returns a map // mapping from captureID to TaskStatus func (c CDCEtcdClient) GetAllTaskStatus(ctx context.Context, changefeedID string) (model.ProcessorsInfos, error) { - resp, err := c.Client.Get(ctx, TaskStatusKeyPrefix, clientv3.WithPrefix()) - if err != nil { - return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) - } - pinfo := make(map[string]*model.TaskStatus, resp.Count) - for _, rawKv := range resp.Kvs { - changeFeed, err := extractKeySuffix(string(rawKv.Key)) - if err != nil { - return nil, err - } - endIndex := len(rawKv.Key) - len(changeFeed) - 1 - captureID, err := extractKeySuffix(string(rawKv.Key[0:endIndex])) - if err != nil { - return nil, err - } - if changeFeed != changefeedID { - continue - } - info := &model.TaskStatus{} - err = info.Unmarshal(rawKv.Value) - if err != nil { - return nil, cerror.ErrDecodeFailed.GenWithStackByArgs("failed to unmarshal task status: %s", err) - } - info.ModRevision = rawKv.ModRevision - pinfo[captureID] = info - } - return pinfo, nil + //resp, err := c.Client.Get(ctx, TaskStatusKeyPrefix, clientv3.WithPrefix()) + //if err != nil { + // return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) + //} + //pinfo := make(map[string]*model.TaskStatus, resp.Count) + //for _, rawKv := range resp.Kvs { + // changeFeed, err := extractKeySuffix(string(rawKv.Key)) + // if err != nil { + // return nil, err + // } + // endIndex := len(rawKv.Key) - len(changeFeed) - 1 + // captureID, err := extractKeySuffix(string(rawKv.Key[0:endIndex])) + // if err != nil { + // return nil, err + // } + // if changeFeed != changefeedID { + // continue + // } + // info := &model.TaskStatus{} + // err = info.Unmarshal(rawKv.Value) + // if err != nil { + // return nil, cerror.ErrDecodeFailed.GenWithStackByArgs("failed to unmarshal task status: %s", err) + // } + // info.ModRevision = rawKv.ModRevision + // pinfo[captureID] = info + //} + //return pinfo, nil + return nil, nil } // GetTaskStatus queries task status from etcd, returns @@ -400,7 +438,7 @@ func (c CDCEtcdClient) GetAllTaskStatus(ctx context.Context, changefeedID string // - error if error happens func (c CDCEtcdClient) GetTaskStatus( ctx context.Context, - changefeedID string, + changefeedID model.ChangeFeedID, captureID string, ) (int64, *model.TaskStatus, error) { key := GetEtcdKeyTaskStatus(changefeedID, captureID) @@ -419,7 +457,7 @@ func (c CDCEtcdClient) GetTaskStatus( // PutTaskStatus puts task status into etcd. func (c CDCEtcdClient) PutTaskStatus( ctx context.Context, - changefeedID string, + changefeedID model.ChangeFeedID, captureID string, info *model.TaskStatus, ) error { @@ -441,32 +479,33 @@ func (c CDCEtcdClient) PutTaskStatus( // GetAllTaskPositions queries all task positions of a changefeed, and returns a map // mapping from captureID to TaskPositions func (c CDCEtcdClient) GetAllTaskPositions(ctx context.Context, changefeedID string) (map[string]*model.TaskPosition, error) { - resp, err := c.Client.Get(ctx, TaskPositionKeyPrefix, clientv3.WithPrefix()) - if err != nil { - return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) - } - positions := make(map[string]*model.TaskPosition, resp.Count) - for _, rawKv := range resp.Kvs { - changeFeed, err := extractKeySuffix(string(rawKv.Key)) - if err != nil { - return nil, err - } - endIndex := len(rawKv.Key) - len(changeFeed) - 1 - captureID, err := extractKeySuffix(string(rawKv.Key[0:endIndex])) - if err != nil { - return nil, err - } - if changeFeed != changefeedID { - continue - } - info := &model.TaskPosition{} - err = info.Unmarshal(rawKv.Value) - if err != nil { - return nil, cerror.ErrDecodeFailed.GenWithStackByArgs("failed to unmarshal task position: %s", err) - } - positions[captureID] = info - } - return positions, nil + //resp, err := c.Client.Get(ctx, TaskPositionKeyPrefix(), clientv3.WithPrefix()) + //if err != nil { + // return nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) + //} + //positions := make(map[string]*model.TaskPosition, resp.Count) + //for _, rawKv := range resp.Kvs { + // changeFeed, err := extractKeySuffix(string(rawKv.Key)) + // if err != nil { + // return nil, err + // } + // endIndex := len(rawKv.Key) - len(changeFeed) - 1 + // captureID, err := extractKeySuffix(string(rawKv.Key[0:endIndex])) + // if err != nil { + // return nil, err + // } + // if changeFeed != changefeedID { + // continue + // } + // info := &model.TaskPosition{} + // err = info.Unmarshal(rawKv.Value) + // if err != nil { + // return nil, cerror.ErrDecodeFailed.GenWithStackByArgs("failed to unmarshal task position: %s", err) + // } + // positions[captureID] = info + //} + //return positions, nil + return nil, nil } // GetTaskPosition queries task process from etcd, returns @@ -478,17 +517,18 @@ func (c CDCEtcdClient) GetTaskPosition( changefeedID string, captureID string, ) (int64, *model.TaskPosition, error) { - key := GetEtcdKeyTaskPosition(changefeedID, captureID) - resp, err := c.Client.Get(ctx, key) - if err != nil { - return 0, nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) - } - if resp.Count == 0 { - return 0, nil, cerror.ErrTaskPositionNotExists.GenWithStackByArgs(key) - } - info := &model.TaskPosition{} - err = info.Unmarshal(resp.Kvs[0].Value) - return resp.Kvs[0].ModRevision, info, errors.Trace(err) + return 0, nil, nil + //key := GetEtcdKeyTaskPosition(changefeedID, captureID) + //resp, err := c.Client.Get(ctx, key) + //if err != nil { + // return 0, nil, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) + //} + //if resp.Count == 0 { + // return 0, nil, cerror.ErrTaskPositionNotExists.GenWithStackByArgs(key) + //} + //info := &model.TaskPosition{} + //err = info.Unmarshal(resp.Kvs[0].Value) + //return resp.Kvs[0].ModRevision, info, errors.Trace(err) } // PutTaskPositionOnChange puts task position information into etcd if the @@ -496,7 +536,7 @@ func (c CDCEtcdClient) GetTaskPosition( // returns true if task position is written to etcd. func (c CDCEtcdClient) PutTaskPositionOnChange( ctx context.Context, - changefeedID string, + changefeedID model.ChangeFeedID, captureID string, info *model.TaskPosition, ) (bool, error) { @@ -523,7 +563,7 @@ func (c CDCEtcdClient) PutTaskPositionOnChange( // PutChangeFeedStatus puts changefeed synchronization status into etcd func (c CDCEtcdClient) PutChangeFeedStatus( ctx context.Context, - changefeedID string, + changefeedID model.ChangeFeedID, status *model.ChangeFeedStatus, ) error { key := GetEtcdKeyJob(changefeedID) @@ -569,7 +609,7 @@ func (c CDCEtcdClient) GetOwnerID(ctx context.Context, key string) (string, erro // GetOwnerRevision gets the Etcd revision for the elected owner. func (c CDCEtcdClient) GetOwnerRevision(ctx context.Context, captureID string) (rev int64, err error) { - resp, err := c.Client.Get(ctx, CaptureOwnerKey, clientv3.WithFirstCreate()...) + resp, err := c.Client.Get(ctx, CaptureOwnerKey(), clientv3.WithFirstCreate()...) if err != nil { return 0, cerror.WrapError(cerror.ErrPDEtcdAPIError, err) } diff --git a/pkg/etcd/etcdkey.go b/pkg/etcd/etcdkey.go index b08d14758e9..5f6e84151e5 100644 --- a/pkg/etcd/etcdkey.go +++ b/pkg/etcd/etcdkey.go @@ -14,17 +14,20 @@ package etcd import ( + "fmt" "strings" "github.com/pingcap/log" + "github.com/pingcap/tiflow/pkg/config" cerror "github.com/pingcap/tiflow/pkg/errors" ) const ( + CDCMetaPrefix = "/__cdc_meta__" // EtcdKeyBase is the common prefix of the keys in CDC - EtcdKeyBase = "/tidb/cdc" - ownerKey = "/owner" - captureKey = "/capture" + //EtcdKeyBase = "/tidb/cdc" + ownerKey = "/owner" + captureKey = "/capture" taskKey = "/task" taskWorkloadKey = taskKey + "/workload" @@ -33,6 +36,7 @@ const ( changefeedInfoKey = "/changefeed/info" jobKey = "/job" + upstreamInfoKey = "/upstream" ) // CDCKeyType is the type of etcd key @@ -48,6 +52,7 @@ const ( CDCKeyTypeTaskPosition CDCKeyTypeTaskStatus CDCKeyTypeTaskWorkload + CDCKeyTypeUpstream ) // CDCKey represents a etcd key which is defined by TiCDC @@ -75,95 +80,130 @@ const ( */ type CDCKey struct { - Tp CDCKeyType - ChangefeedID string - CaptureID string - OwnerLeaseID string + Tp CDCKeyType + ChangefeedID string + CaptureID string + OwnerLeaseID string + Namespace string + ClusterID string + UpstreamClusterID string } // Parse parses the given etcd key func (k *CDCKey) Parse(key string) error { - if !strings.HasPrefix(key, EtcdKeyBase) { + if !strings.HasPrefix(key, EtcdKeyBase()) { return cerror.ErrInvalidEtcdKey.GenWithStackByArgs(key) } - key = key[len(EtcdKeyBase):] - switch { - case strings.HasPrefix(key, ownerKey): - k.Tp = CDCKeyTypeOwner - k.CaptureID = "" - k.ChangefeedID = "" - key = key[len(ownerKey):] - if len(key) > 0 { - key = key[1:] - } - k.OwnerLeaseID = key - case strings.HasPrefix(key, captureKey): - k.Tp = CDCKeyTypeCapture - k.CaptureID = key[len(captureKey)+1:] - k.ChangefeedID = "" - k.OwnerLeaseID = "" - case strings.HasPrefix(key, changefeedInfoKey): - k.Tp = CDCKeyTypeChangefeedInfo - k.CaptureID = "" - k.ChangefeedID = key[len(changefeedInfoKey)+1:] - k.OwnerLeaseID = "" - case strings.HasPrefix(key, jobKey): - k.Tp = CDCKeyTypeChangeFeedStatus - k.CaptureID = "" - k.ChangefeedID = key[len(jobKey)+1:] - k.OwnerLeaseID = "" - case strings.HasPrefix(key, taskStatusKey): - splitKey := strings.SplitN(key[len(taskStatusKey)+1:], "/", 2) - if len(splitKey) != 2 { - return cerror.ErrInvalidEtcdKey.GenWithStackByArgs(key) - } - k.Tp = CDCKeyTypeTaskStatus - k.CaptureID = splitKey[0] - k.ChangefeedID = splitKey[1] - k.OwnerLeaseID = "" - case strings.HasPrefix(key, taskPositionKey): - splitKey := strings.SplitN(key[len(taskPositionKey)+1:], "/", 2) - if len(splitKey) != 2 { + key = key[len("/tidb/cdc"):] + parts := strings.Split(key, "/") + k.ClusterID = parts[1] + key = key[len(k.ClusterID)+1:] + if strings.HasPrefix(key, CDCMetaPrefix) { + key = key[len(CDCMetaPrefix):] + switch { + case strings.HasPrefix(key, ownerKey): + k.Tp = CDCKeyTypeOwner + k.CaptureID = "" + k.ChangefeedID = "" + key = key[len(ownerKey):] + if len(key) > 0 { + key = key[1:] + } + k.OwnerLeaseID = key + case strings.HasPrefix(key, captureKey): + k.Tp = CDCKeyTypeCapture + k.CaptureID = key[len(captureKey)+1:] + k.ChangefeedID = "" + k.OwnerLeaseID = "" + case strings.HasPrefix(key, upstreamInfoKey): + k.Tp = CDCKeyTypeUpstream + k.CaptureID = "" + k.UpstreamClusterID = key[len(upstreamInfoKey)+1:] + k.OwnerLeaseID = "" + default: return cerror.ErrInvalidEtcdKey.GenWithStackByArgs(key) } - k.Tp = CDCKeyTypeTaskPosition - k.CaptureID = splitKey[0] - k.ChangefeedID = splitKey[1] - k.OwnerLeaseID = "" - case strings.HasPrefix(key, taskWorkloadKey): - splitKey := strings.SplitN(key[len(taskWorkloadKey)+1:], "/", 2) - if len(splitKey) != 2 { + } else { + k.Namespace = parts[2] + key = key[len(k.Namespace)+1:] + switch { + case strings.HasPrefix(key, changefeedInfoKey): + k.Tp = CDCKeyTypeChangefeedInfo + k.CaptureID = "" + k.ChangefeedID = key[len(changefeedInfoKey)+1:] + k.OwnerLeaseID = "" + case strings.HasPrefix(key, jobKey): + k.Tp = CDCKeyTypeChangeFeedStatus + k.CaptureID = "" + k.ChangefeedID = key[len(jobKey)+1:] + k.OwnerLeaseID = "" + case strings.HasPrefix(key, taskStatusKey): + splitKey := strings.SplitN(key[len(taskStatusKey)+1:], "/", 2) + if len(splitKey) != 2 { + return cerror.ErrInvalidEtcdKey.GenWithStackByArgs(key) + } + k.Tp = CDCKeyTypeTaskStatus + k.CaptureID = splitKey[0] + k.ChangefeedID = splitKey[1] + k.OwnerLeaseID = "" + case strings.HasPrefix(key, taskPositionKey): + splitKey := strings.SplitN(key[len(taskPositionKey)+1:], "/", 2) + if len(splitKey) != 2 { + return cerror.ErrInvalidEtcdKey.GenWithStackByArgs(key) + } + k.Tp = CDCKeyTypeTaskPosition + k.CaptureID = splitKey[0] + k.ChangefeedID = splitKey[1] + k.OwnerLeaseID = "" + case strings.HasPrefix(key, taskWorkloadKey): + splitKey := strings.SplitN(key[len(taskWorkloadKey)+1:], "/", 2) + if len(splitKey) != 2 { + return cerror.ErrInvalidEtcdKey.GenWithStackByArgs(key) + } + k.Tp = CDCKeyTypeTaskWorkload + k.CaptureID = splitKey[0] + k.ChangefeedID = splitKey[1] + k.OwnerLeaseID = "" + default: return cerror.ErrInvalidEtcdKey.GenWithStackByArgs(key) } - k.Tp = CDCKeyTypeTaskWorkload - k.CaptureID = splitKey[0] - k.ChangefeedID = splitKey[1] - k.OwnerLeaseID = "" - default: - return cerror.ErrInvalidEtcdKey.GenWithStackByArgs(key) } return nil } +func CDCClusterBase(id string) string { + return fmt.Sprintf("/tidb/cdc/%s", id) +} + +func EtcdKeyBase() string { + return fmt.Sprintf("/tidb/cdc/%s", config.GetGlobalServerConfig().ClusterID) +} + +func NamespacedPrefix(namespace string) string { + return EtcdKeyBase() + "/" + namespace +} + func (k *CDCKey) String() string { switch k.Tp { case CDCKeyTypeOwner: if len(k.OwnerLeaseID) == 0 { - return EtcdKeyBase + ownerKey + return EtcdKeyBase() + CDCMetaPrefix + ownerKey } - return EtcdKeyBase + ownerKey + "/" + k.OwnerLeaseID + return EtcdKeyBase() + CDCMetaPrefix + ownerKey + "/" + k.OwnerLeaseID case CDCKeyTypeCapture: - return EtcdKeyBase + captureKey + "/" + k.CaptureID + return EtcdKeyBase() + CDCMetaPrefix + captureKey + "/" + k.CaptureID case CDCKeyTypeChangefeedInfo: - return EtcdKeyBase + changefeedInfoKey + "/" + k.ChangefeedID + return NamespacedPrefix(k.Namespace) + changefeedInfoKey + "/" + k.ChangefeedID case CDCKeyTypeChangeFeedStatus: - return EtcdKeyBase + jobKey + "/" + k.ChangefeedID + return NamespacedPrefix(k.Namespace) + jobKey + "/" + k.ChangefeedID case CDCKeyTypeTaskPosition: - return EtcdKeyBase + taskPositionKey + "/" + k.CaptureID + "/" + k.ChangefeedID + return NamespacedPrefix(k.Namespace) + taskPositionKey + "/" + k.CaptureID + "/" + k.ChangefeedID case CDCKeyTypeTaskStatus: - return EtcdKeyBase + taskStatusKey + "/" + k.CaptureID + "/" + k.ChangefeedID + return NamespacedPrefix(k.Namespace) + taskStatusKey + "/" + k.CaptureID + "/" + k.ChangefeedID case CDCKeyTypeTaskWorkload: - return EtcdKeyBase + taskWorkloadKey + "/" + k.CaptureID + "/" + k.ChangefeedID + return NamespacedPrefix(k.Namespace) + taskWorkloadKey + "/" + k.CaptureID + "/" + k.ChangefeedID + case CDCKeyTypeUpstream: + return NamespacedPrefix(k.Namespace) + upstreamInfoKey + "/" + k.UpstreamClusterID } log.Panic("unreachable") return "" diff --git a/pkg/orchestrator/etcd_worker.go b/pkg/orchestrator/etcd_worker.go index d7b23a3c760..814f3e35c1d 100644 --- a/pkg/orchestrator/etcd_worker.go +++ b/pkg/orchestrator/etcd_worker.go @@ -41,7 +41,7 @@ const ( // When EtcdWorker commits a txn to etcd or ticks its reactor // takes more than etcdWorkerLogsWarnDuration, it will print a log etcdWorkerLogsWarnDuration = 1 * time.Second - deletionCounterKey = "/meta/ticdc-delete-etcd-key-count" + deletionCounterKey = "/__cdc_meta__/meta/ticdc-delete-etcd-key-count" ) // EtcdWorker handles all interactions with Etcd diff --git a/pkg/orchestrator/reactor_state.go b/pkg/orchestrator/reactor_state.go index 82f6c0dbe52..3c48c08bdaf 100644 --- a/pkg/orchestrator/reactor_state.go +++ b/pkg/orchestrator/reactor_state.go @@ -31,6 +31,7 @@ type GlobalReactorState struct { Owner map[string]struct{} Captures map[model.CaptureID]*model.CaptureInfo Changefeeds map[model.ChangeFeedID]*ChangefeedReactorState + Upstreams map[string]*model.UpstreamInfo pendingPatches [][]DataPatch // onCaptureAdded and onCaptureRemoved are hook functions @@ -45,6 +46,7 @@ func NewGlobalState() *GlobalReactorState { Owner: map[string]struct{}{}, Captures: make(map[model.CaptureID]*model.CaptureInfo), Changefeeds: make(map[model.ChangeFeedID]*ChangefeedReactorState), + Upstreams: map[string]*model.UpstreamInfo{}, } } @@ -92,21 +94,46 @@ func (s *GlobalReactorState) Update(key util.EtcdKey, value []byte, _ bool) erro etcd.CDCKeyTypeTaskPosition, etcd.CDCKeyTypeTaskStatus, etcd.CDCKeyTypeTaskWorkload: - changefeedState, exist := s.Changefeeds[k.ChangefeedID] + id := model.ChangeFeedID{ + Namespace: k.Namespace, + ID: k.ChangefeedID, + } + changefeedState, exist := s.Changefeeds[id] if !exist { if value == nil { return nil } - changefeedState = NewChangefeedReactorState(k.ChangefeedID) - s.Changefeeds[k.ChangefeedID] = changefeedState + changefeedState = NewChangefeedReactorState(id) + s.Changefeeds[id] = changefeedState } if err := changefeedState.UpdateCDCKey(k, value); err != nil { return errors.Trace(err) } if value == nil && !changefeedState.Exist() { s.pendingPatches = append(s.pendingPatches, changefeedState.getPatches()) - delete(s.Changefeeds, k.ChangefeedID) + delete(s.Changefeeds, id) } + case etcd.CDCKeyTypeUpstream: + if value == nil { + log.Info("upstream is deleted", + zap.String("upstreamID", k.UpstreamClusterID), + zap.Any("info", s.Upstreams[k.UpstreamClusterID])) + delete(s.Upstreams, k.UpstreamClusterID) + return nil + } + + var newUpstreamInfo model.UpstreamInfo + err := newUpstreamInfo.Unmarshal(value) + if err != nil { + return cerrors.ErrUnmarshalFailed.Wrap(err).GenWithStackByArgs() + } + + log.Info("new upstream online", + zap.String("upstreamID", k.UpstreamClusterID), zap.Any("info", newUpstreamInfo)) + //if s.onCaptureAdded != nil { + // s.onCaptureAdded(k.CaptureID, newCaptureInfo.AdvertiseAddr) + //} + s.Upstreams[k.UpstreamClusterID] = &newUpstreamInfo default: log.Warn("receive an unexpected etcd event", zap.String("key", key.String()), zap.ByteString("value", value)) } @@ -175,7 +202,7 @@ func (s *ChangefeedReactorState) UpdateCDCKey(key *etcd.CDCKey, value []byte) er var e interface{} switch key.Tp { case etcd.CDCKeyTypeChangefeedInfo: - if key.ChangefeedID != s.ID { + if key.ChangefeedID != s.ID.ID || key.Namespace != s.ID.Namespace { return nil } if value == nil { @@ -185,7 +212,7 @@ func (s *ChangefeedReactorState) UpdateCDCKey(key *etcd.CDCKey, value []byte) er s.Info = new(model.ChangeFeedInfo) e = s.Info case etcd.CDCKeyTypeChangeFeedStatus: - if key.ChangefeedID != s.ID { + if key.ChangefeedID != s.ID.ID || key.Namespace != s.ID.Namespace { return nil } if value == nil { @@ -195,7 +222,7 @@ func (s *ChangefeedReactorState) UpdateCDCKey(key *etcd.CDCKey, value []byte) er s.Status = new(model.ChangeFeedStatus) e = s.Status case etcd.CDCKeyTypeTaskPosition: - if key.ChangefeedID != s.ID { + if key.ChangefeedID != s.ID.ID || key.Namespace != s.ID.Namespace { return nil } if value == nil { @@ -206,7 +233,7 @@ func (s *ChangefeedReactorState) UpdateCDCKey(key *etcd.CDCKey, value []byte) er s.TaskPositions[key.CaptureID] = position e = position case etcd.CDCKeyTypeTaskStatus: - if key.ChangefeedID != s.ID { + if key.ChangefeedID != s.ID.ID || key.Namespace != s.ID.Namespace { return nil } if value == nil { @@ -217,7 +244,7 @@ func (s *ChangefeedReactorState) UpdateCDCKey(key *etcd.CDCKey, value []byte) er s.TaskStatuses[key.CaptureID] = status e = status case etcd.CDCKeyTypeTaskWorkload: - if key.ChangefeedID != s.ID { + if key.ChangefeedID != s.ID.ID || key.Namespace != s.ID.Namespace { return nil } if value == nil { @@ -313,7 +340,8 @@ func (s *ChangefeedReactorState) CheckChangefeedNormal() { func (s *ChangefeedReactorState) PatchInfo(fn func(*model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error)) { key := &etcd.CDCKey{ Tp: etcd.CDCKeyTypeChangefeedInfo, - ChangefeedID: s.ID, + ChangefeedID: s.ID.ID, + Namespace: s.ID.Namespace, } s.patchAny(key.String(), changefeedInfoTPI, func(e interface{}) (interface{}, bool, error) { // e == nil means that the key is not exist before this patch @@ -328,7 +356,8 @@ func (s *ChangefeedReactorState) PatchInfo(fn func(*model.ChangeFeedInfo) (*mode func (s *ChangefeedReactorState) PatchStatus(fn func(*model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error)) { key := &etcd.CDCKey{ Tp: etcd.CDCKeyTypeChangeFeedStatus, - ChangefeedID: s.ID, + ChangefeedID: s.ID.ID, + Namespace: s.ID.Namespace, } s.patchAny(key.String(), changefeedStatusTPI, func(e interface{}) (interface{}, bool, error) { // e == nil means that the key is not exist before this patch @@ -344,7 +373,8 @@ func (s *ChangefeedReactorState) PatchTaskPosition(captureID model.CaptureID, fn key := &etcd.CDCKey{ Tp: etcd.CDCKeyTypeTaskPosition, CaptureID: captureID, - ChangefeedID: s.ID, + ChangefeedID: s.ID.ID, + Namespace: s.ID.Namespace, } s.patchAny(key.String(), taskPositionTPI, func(e interface{}) (interface{}, bool, error) { // e == nil means that the key is not exist before this patch @@ -360,7 +390,8 @@ func (s *ChangefeedReactorState) PatchTaskStatus(captureID model.CaptureID, fn f key := &etcd.CDCKey{ Tp: etcd.CDCKeyTypeTaskStatus, CaptureID: captureID, - ChangefeedID: s.ID, + ChangefeedID: s.ID.ID, + Namespace: s.ID.Namespace, } s.patchAny(key.String(), taskStatusTPI, func(e interface{}) (interface{}, bool, error) { // e == nil means that the key is not exist before this patch @@ -376,7 +407,8 @@ func (s *ChangefeedReactorState) PatchTaskWorkload(captureID model.CaptureID, fn key := &etcd.CDCKey{ Tp: etcd.CDCKeyTypeTaskWorkload, CaptureID: captureID, - ChangefeedID: s.ID, + ChangefeedID: s.ID.ID, + Namespace: s.ID.Namespace, } s.patchAny(key.String(), taskWorkloadTPI, func(e interface{}) (interface{}, bool, error) { // e == nil means that the key is not exist before this patch diff --git a/pkg/txnutil/gc/gc_manager.go b/pkg/txnutil/gc/gc_manager.go index 3fb5d359a7a..b266fee255b 100644 --- a/pkg/txnutil/gc/gc_manager.go +++ b/pkg/txnutil/gc/gc_manager.go @@ -22,8 +22,8 @@ import ( "github.com/pingcap/log" "github.com/pingcap/tiflow/cdc/model" "github.com/pingcap/tiflow/pkg/config" - cdcContext "github.com/pingcap/tiflow/pkg/context" cerror "github.com/pingcap/tiflow/pkg/errors" + "github.com/pingcap/tiflow/pkg/pdtime" "github.com/tikv/client-go/v2/oracle" pd "github.com/tikv/pd/client" "go.uber.org/zap" @@ -42,12 +42,13 @@ type Manager interface { // TryUpdateGCSafePoint tries to update TiCDC service GC safepoint. // Manager may skip update when it thinks it is too frequent. // Set `forceUpdate` to force Manager update. - TryUpdateGCSafePoint(ctx context.Context, checkpointTs model.Ts, forceUpdate bool) error + TryUpdateGCSafePoint(ctx context.Context, serviceID string, checkpointTs model.Ts, forceUpdate bool) error CheckStaleCheckpointTs(ctx context.Context, changefeedID model.ChangeFeedID, checkpointTs model.Ts) error } type gcManager struct { pdClient pd.Client + pdClock pdtime.Clock gcTTL int64 lastUpdatedTime time.Time @@ -57,20 +58,21 @@ type gcManager struct { } // NewManager creates a new Manager. -func NewManager(pdClient pd.Client) Manager { +func NewManager(pdClient pd.Client, pdClock pdtime.Clock) Manager { serverConfig := config.GetGlobalServerConfig() failpoint.Inject("InjectGcSafepointUpdateInterval", func(val failpoint.Value) { gcSafepointUpdateInterval = time.Duration(val.(int) * int(time.Millisecond)) }) return &gcManager{ pdClient: pdClient, + pdClock: pdClock, lastSucceededTime: time.Now(), gcTTL: serverConfig.GcTTL, } } func (m *gcManager) TryUpdateGCSafePoint( - ctx context.Context, checkpointTs model.Ts, forceUpdate bool, + ctx context.Context, serviceID string, checkpointTs model.Ts, forceUpdate bool, ) error { if time.Since(m.lastUpdatedTime) < gcSafepointUpdateInterval && !forceUpdate { return nil @@ -78,7 +80,7 @@ func (m *gcManager) TryUpdateGCSafePoint( m.lastUpdatedTime = time.Now() actual, err := setServiceGCSafepoint( - ctx, m.pdClient, CDCServiceSafePointID, m.gcTTL, checkpointTs) + ctx, m.pdClient, serviceID, m.gcTTL, checkpointTs) if err != nil { log.Warn("updateGCSafePoint failed", zap.Uint64("safePointTs", checkpointTs), @@ -111,11 +113,7 @@ func (m *gcManager) CheckStaleCheckpointTs( ) error { gcSafepointUpperBound := checkpointTs - 1 if m.isTiCDCBlockGC { - cctx, ok := ctx.(cdcContext.Context) - if !ok { - return cerror.ErrOwnerUnknown.GenWithStack("ctx not an cdcContext.Context, it should be") - } - pdTime, err := cctx.GlobalVars().PDClock.CurrentTime() + pdTime, err := m.pdClock.CurrentTime() // TODO: should we return err here, or just log it? if err != nil { return errors.Trace(err) diff --git a/pkg/txnutil/gc/gc_service.go b/pkg/txnutil/gc/gc_service.go index 85e21d91909..3e25a4ea4b3 100644 --- a/pkg/txnutil/gc/gc_service.go +++ b/pkg/txnutil/gc/gc_service.go @@ -19,6 +19,8 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/model" + "github.com/pingcap/tiflow/pkg/config" cerrors "github.com/pingcap/tiflow/pkg/errors" "github.com/pingcap/tiflow/pkg/retry" pd "github.com/tikv/pd/client" @@ -33,10 +35,10 @@ const ( // EnsureChangefeedStartTsSafety checks if the startTs less than the minimum of // service GC safepoint and this function will update the service GC to startTs func EnsureChangefeedStartTsSafety( - ctx context.Context, pdCli pd.Client, changefeedID string, TTL int64, startTs uint64, + ctx context.Context, pdCli pd.Client, changefeedID model.ChangeFeedID, TTL int64, startTs uint64, ) error { minServiceGCTs, err := setServiceGCSafepoint( - ctx, pdCli, cdcChangefeedCreatingServiceGCSafePointID+changefeedID, TTL, startTs) + ctx, pdCli, cdcChangefeedCreatingServiceGCSafePointID+config.GetGlobalServerConfig().ClusterID+"-"+changefeedID.String(), TTL, startTs) if err != nil { return errors.Trace(err) } diff --git a/pkg/txnutil/lock_resolver.go b/pkg/txnutil/lock_resolver.go index c7bf51c8f28..79ec0fd180f 100644 --- a/pkg/txnutil/lock_resolver.go +++ b/pkg/txnutil/lock_resolver.go @@ -135,7 +135,7 @@ func (r *resolver) Resolve(ctx context.Context, regionID uint64, maxVersion uint zap.Uint64("regionID", regionID), zap.Int("lockCount", lockCount), zap.Uint64("maxVersion", maxVersion), - zap.String("changefeed", r.changefeed), + zap.String("changefeed", r.changefeed.String()), zap.Any("role", r.role)) return nil } diff --git a/pkg/upstream/manager.go b/pkg/upstream/manager.go new file mode 100644 index 00000000000..ac960a71188 --- /dev/null +++ b/pkg/upstream/manager.go @@ -0,0 +1,112 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package upstream + +import ( + "context" + "strings" + "sync" + + "github.com/benbjohnson/clock" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/pingcap/tiflow/cdc/model" + "github.com/pingcap/tiflow/pkg/config" + "go.uber.org/zap" +) + +var UpStreamManager *Manager + +type Manager struct { + ups sync.Map + c clock.Clock + ctx context.Context + lck sync.Mutex +} + +// NewManager create a new Manager. +func NewManager(ctx context.Context) *Manager { + return &Manager{c: clock.New(), ctx: ctx} +} + +func (m *Manager) TryInit(clusterID string, upstreamInfo *model.UpstreamInfo) error { + if up, ok := m.ups.Load(clusterID); ok { + up.(*UpStream).hold() + up.(*UpStream).clearIdealCount() + return nil + } + m.lck.Lock() + defer m.lck.Unlock() + if up, ok := m.ups.Load(clusterID); ok { + up.(*UpStream).hold() + up.(*UpStream).clearIdealCount() + return nil + } + + // TODO: use changefeed's pd addr in the future + pdEndpoints := strings.Split(upstreamInfo.PD, ",") + securityConfig := &config.SecurityConfig{ + CAPath: upstreamInfo.CAPath, + CertPath: upstreamInfo.CertPath, + KeyPath: upstreamInfo.KeyPath, + } + up := newUpStream(pdEndpoints, securityConfig) + up.hold() + // 之后的实现需要检查错误 + _ = up.Init(m.ctx) + m.ups.Store(clusterID, up) + return nil +} + +// Get gets a upStream by clusterID, if this upStream does not exis, create it. +func (m *Manager) Get(clusterID string) (*UpStream, error) { + if up, ok := m.ups.Load(clusterID); ok { + up.(*UpStream).hold() + up.(*UpStream).clearIdealCount() + return up.(*UpStream), nil + } + return nil, errors.New("upstream is not found") +} + +// Release releases a upStream by clusterID +func (m *Manager) Release(clusterID uint64) { + if up, ok := m.ups.Load(clusterID); ok { + up.(*UpStream).unhold() + } +} + +func (m *Manager) checkUpstreams() { + m.ups.Range(func(k, v interface{}) bool { + up := v.(*UpStream) + if !up.isHold() { + up.addIdealCount() + } + if up.shouldClose() { + up.close() + m.ups.Delete(k) + } + return true + }) +} + +func (m *Manager) closeUpstreams() { + m.ups.Range(func(k, v interface{}) bool { + id := k.(uint64) + up := v.(*UpStream) + up.close() + log.Info("upStream closed", zap.Uint64("cluster id", id)) + m.ups.Delete(id) + return true + }) +} diff --git a/pkg/upstream/up_stream.go b/pkg/upstream/up_stream.go new file mode 100644 index 00000000000..3b57a9bebae --- /dev/null +++ b/pkg/upstream/up_stream.go @@ -0,0 +1,217 @@ +// Copyright 2022 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package upstream + +import ( + "context" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/pingcap/log" + tidbkv "github.com/pingcap/tidb/kv" + "github.com/pingcap/tiflow/cdc/kv" + "github.com/pingcap/tiflow/pkg/config" + "github.com/pingcap/tiflow/pkg/pdtime" + "github.com/pingcap/tiflow/pkg/txnutil/gc" + "github.com/pingcap/tiflow/pkg/version" + "github.com/tikv/client-go/v2/tikv" + pd "github.com/tikv/pd/client" + "go.uber.org/zap" + "google.golang.org/grpc" + "google.golang.org/grpc/backoff" +) + +type status int + +const ( + nonInit status = iota + initializing + initialized + closed + + // 30 mins + idealTreshold = 180 +) + +type UpStream struct { + pdEndpoints []string + securityConfig *config.SecurityConfig + + PDClient pd.Client + KVStorage tidbkv.Storage + GrpcPool kv.GrpcPool + RegionCache *tikv.RegionCache + PDClock *pdtime.PDClock + GCManager gc.Manager + // 用来计算有多少个 changefeed 持有该资源,当持有数为 0 时,owner 负责关闭该资源。 + hc int32 + // 如果 idealCount 超过 + idealCount int32 + // 0 or 1, 1 indicate this upStream is initlized + status status +} + +// close closes all resources +func (up *UpStream) close() { + if up.status == closed { + return + } + if up.PDClient != nil { + up.PDClient.Close() + } + + if up.KVStorage != nil { + err := up.KVStorage.Close() + if err != nil { + log.Warn("kv store close failed", zap.Error(err)) + } + } + + if up.GrpcPool != nil { + up.GrpcPool.Close() + } + if up.RegionCache != nil { + up.RegionCache.Close() + } + if up.PDClock != nil { + up.PDClock.Stop() + } + up.status = closed +} + +func newUpStream(pdEndpoints []string, securityConfig *config.SecurityConfig) *UpStream { + return &UpStream{pdEndpoints: pdEndpoints, securityConfig: securityConfig, status: nonInit} +} + +func (up *UpStream) IsInitialized() bool { + return up.status == initialized +} + +func (up *UpStream) IsInitializing() bool { + return up.status == initializing +} + +func (up *UpStream) IsColse() bool { + return up.status == closed +} + +// 调用者需要定期检查 error chan 和 up 是否初始化完毕 +// 若出现 error 调用者需要调用 close 方法来释放可能已经初始化完毕的资源 +func (up *UpStream) Init(ctx context.Context) chan error { + errCh := make(chan error) + if up.IsInitialized() || up.IsInitializing() { + return errCh + } + // 以后需要改为异步初始化 + up.init(ctx) + return errCh +} + +func (up *UpStream) init(ctx context.Context) { + log.Info("init upstream") + var err error + wg := new(sync.WaitGroup) + + grpcTLSOption, err := up.securityConfig.ToGRPCDialOption() + if err != nil { + log.Error("init upstream error", zap.Error(err)) + return + } + + pdClient, err := pd.NewClientWithContext( + ctx, up.pdEndpoints, up.securityConfig.PDSecurityOption(), + pd.WithGRPCDialOptions( + grpcTLSOption, + grpc.WithBlock(), + grpc.WithConnectParams(grpc.ConnectParams{ + Backoff: backoff.Config{ + BaseDelay: time.Second, + Multiplier: 1.1, + Jitter: 0.1, + MaxDelay: 3 * time.Second, + }, + MinConnectTimeout: 3 * time.Second, + }), + )) + if err != nil { + log.Error("init upstream error", zap.Error(err)) + return + } + up.PDClient = pdClient + + // To not block CDC server startup, we need to warn instead of error + // when TiKV is incompatible. + errorTiKVIncompatible := false + err = version.CheckClusterVersion(ctx, up.PDClient, up.pdEndpoints, up.securityConfig, errorTiKVIncompatible) + if err != nil { + log.Error("init upstream error", zap.Error(err)) + } + + kvStore, err := kv.CreateTiStore(strings.Join(up.pdEndpoints, ","), up.securityConfig) + if err != nil { + log.Error("init upstream error", zap.Error(err)) + return + } + up.KVStorage = kvStore + + up.GrpcPool = kv.NewGrpcPoolImpl(ctx, up.securityConfig) + up.RegionCache = tikv.NewRegionCache(up.PDClient) + + up.PDClock, err = pdtime.NewClock(ctx, up.PDClient) + if err != nil { + log.Error("init upstream error", zap.Error(err)) + return + } + + wg.Add(1) + go func() { + defer wg.Done() + up.PDClock.Run(ctx) + }() + wg.Add(1) + go func() { + defer wg.Done() + up.GrpcPool.RecycleConn(ctx) + }() + + up.GCManager = gc.NewManager(up.PDClient, up.PDClock) + log.Warn("upStream gcManager created") + up.status = initialized +} + +func (up *UpStream) hold() { + atomic.AddInt32(&up.hc, 1) +} + +func (up *UpStream) unhold() { + atomic.AddInt32(&up.hc, -1) +} + +func (up *UpStream) isHold() bool { + return atomic.LoadInt32(&up.hc) == 0 +} + +func (up *UpStream) addIdealCount() { + atomic.AddInt32(&up.idealCount, 1) +} + +func (up *UpStream) clearIdealCount() { + atomic.StoreInt32(&up.idealCount, 0) +} + +func (up *UpStream) shouldClose() bool { + return atomic.LoadInt32(&up.idealCount) >= idealTreshold +} diff --git a/pkg/util/tz.go b/pkg/util/tz.go index 77d03e17306..f9c058fea48 100644 --- a/pkg/util/tz.go +++ b/pkg/util/tz.go @@ -35,7 +35,7 @@ func GetTimezone(name string) (tz *time.Location, err error) { return } -func getTimezoneFromZonefile(zonefile string) (tz *time.Location, err error) { +func GetTimezoneFromZonefile(zonefile string) (tz *time.Location, err error) { // the linked path of `/etc/localtime` sample: // MacOS: /var/db/timezone/zoneinfo/Asia/Shanghai // Linux: /usr/share/zoneinfo/Asia/Shanghai @@ -58,5 +58,5 @@ func GetLocalTimezone() (*time.Location, error) { return time.Local, nil } str := timeutil.InferSystemTZ() - return getTimezoneFromZonefile(str) + return GetTimezoneFromZonefile(str) } diff --git a/pkg/util/tz_test.go b/pkg/util/tz_test.go index 31b07019690..cf0a5cd413d 100644 --- a/pkg/util/tz_test.go +++ b/pkg/util/tz_test.go @@ -32,7 +32,7 @@ func TestGetTimezoneFromZonefile(t *testing.T) { {false, "/usr/share/zoneinfo/Asia/Shanghai", "Asia/Shanghai"}, } for _, tc := range testCases { - loc, err := getTimezoneFromZonefile(tc.zonefile) + loc, err := GetTimezoneFromZonefile(tc.zonefile) if tc.hasErr { require.NotNil(t, err) } else { diff --git a/pkg/version/version.go b/pkg/version/version.go index ba4d2f78e8a..157334efa09 100644 --- a/pkg/version/version.go +++ b/pkg/version/version.go @@ -24,7 +24,7 @@ import ( // Version information. var ( - ReleaseVersion = "None" + ReleaseVersion = "v5.4.0" BuildTS = "None" GitHash = "None" GitBranch = "None" diff --git a/tests/integration_tests/move_table/main.go b/tests/integration_tests/move_table/main.go index db811dc1196..bedbcaaec1d 100644 --- a/tests/integration_tests/move_table/main.go +++ b/tests/integration_tests/move_table/main.go @@ -208,7 +208,7 @@ func (c *cluster) moveAllTables(ctx context.Context, sourceCapture, targetCaptur } func (c *cluster) refreshInfo(ctx context.Context) error { - ownerID, err := c.cdcEtcdCli.GetOwnerID(ctx, etcd.CaptureOwnerKey) + ownerID, err := c.cdcEtcdCli.GetOwnerID(ctx, etcd.CaptureOwnerKey()) if err != nil { return errors.Trace(err) } diff --git a/tests/utils/cdc_state_checker/cdc_monitor.go b/tests/utils/cdc_state_checker/cdc_monitor.go index 0c3480c9498..3458df1908a 100644 --- a/tests/utils/cdc_state_checker/cdc_monitor.go +++ b/tests/utils/cdc_state_checker/cdc_monitor.go @@ -74,7 +74,7 @@ func newCDCMonitor(ctx context.Context, pd string, credential *security.Credenti wrappedCli := etcd.Wrap(etcdCli, map[string]prometheus.Counter{}) reactor := &cdcMonitReactor{} initState := newCDCReactorState() - etcdWorker, err := orchestrator.NewEtcdWorker(wrappedCli, etcd.EtcdKeyBase, reactor, initState) + etcdWorker, err := orchestrator.NewEtcdWorker(wrappedCli, etcd.EtcdKeyBase(), reactor, initState) if err != nil { return nil, errors.Trace(err) } diff --git a/tests/utils/cdc_state_checker/state.go b/tests/utils/cdc_state_checker/state.go index 77396a56a4c..904dee166c2 100644 --- a/tests/utils/cdc_state_checker/state.go +++ b/tests/utils/cdc_state_checker/state.go @@ -35,7 +35,7 @@ type cdcReactorState struct { } var ( - captureRegex = regexp.MustCompile(regexp.QuoteMeta(etcd.CaptureInfoKeyPrefix) + "/(.+)") + captureRegex = regexp.MustCompile(regexp.QuoteMeta(etcd.CaptureInfoKeyPrefix()) + "/(.+)") changefeedRegex = regexp.MustCompile(regexp.QuoteMeta(etcd.JobKeyPrefix) + "/(.+)") positionRegex = regexp.MustCompile(regexp.QuoteMeta(etcd.TaskPositionKeyPrefix) + "/(.+?)/(.+)") statusRegex = regexp.MustCompile(regexp.QuoteMeta(etcd.TaskStatusKeyPrefix) + "/(.+?)/(.+)") @@ -51,7 +51,7 @@ func newCDCReactorState() *cdcReactorState { } func (s *cdcReactorState) Update(key util.EtcdKey, value []byte, isInit bool) error { - if key.String() == etcd.CaptureOwnerKey { + if key.String() == etcd.CaptureOwnerKey() { if value == nil { log.Info("Owner lost", zap.String("oldOwner", s.Owner)) return nil