diff --git a/svc/product_series_auto_scheduler.go b/svc/product_series_auto_scheduler.go index 3fa98f6..7b37060 100644 --- a/svc/product_series_auto_scheduler.go +++ b/svc/product_series_auto_scheduler.go @@ -1179,6 +1179,15 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri var id int64 err := pg.QueryRowContext(ctx, `SELECT dim_id FROM mk_dim_token_map WHERE dim_column=$1 AND token=$2`, column, tok).Scan(&id) if err == nil { + // mk_dim_token_map can become polluted by heuristic inference/persistence (especially for dimval1). + // If we can cross-check against item-scoped images, prefer the item-specific inference to avoid + // conflating different tokens onto the same dim_id. + if column == "dimval1" && mmitemID > 0 { + if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok && inferred > 0 && inferred != id { + log.Printf("[DimTokenConflict] column=dimval1 token=%s mmitem_id=%d mapped=%d inferred=%d", tok, mmitemID, id, inferred) + return inferred, true, nil + } + } return id, id > 0, nil } if err != sql.ErrNoRows { @@ -1186,12 +1195,20 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri } // Fallback: infer from dfblob filenames. - // For dimval3, prefer token map as the source of truth when present; use image inference only when missing. - if column == "dimval3" && mmitemID > 0 { - if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok { + // Prefer item-scoped inference when we have mmitem_id. We intentionally avoid persisting these + // inferred rows into mk_dim_token_map to prevent global pollution. + if mmitemID > 0 { + if inferred, ok := productSeriesInferDimIDFromImages(pg, mmitemID, column, tok); ok && inferred > 0 { return inferred, true, nil } } + + // As a last resort, do a global inference. This is intentionally disabled for dimval1 because it is + // too easy to mis-infer and conflate tokens across the whole catalog. + if column == "dimval1" { + return 0, false, nil + } + v := productSeriesResolveDimvalFromFileNameToken(pg, column, tok, 0) if v == "" { return 0, false, nil @@ -1200,15 +1217,6 @@ func productSeriesResolveDimTokenID(ctx context.Context, pg *sql.DB, column stri if perr != nil || parsed <= 0 { return 0, false, nil } - if column == "dimval1" { - // Persist only for dimval1 where tokens are globally stable. - _, _ = pg.ExecContext(ctx, ` -INSERT INTO mk_dim_token_map (dim_column, token, dim_id, updated_at) -VALUES ($1,$2,$3,now()) -ON CONFLICT (dim_column, token) -DO UPDATE SET dim_id = EXCLUDED.dim_id, updated_at = EXCLUDED.updated_at -`, column, tok, parsed) - } return parsed, true, nil }