Merge remote-tracking branch 'origin/master'
This commit is contained in:
@@ -19,6 +19,7 @@ import (
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
@@ -48,12 +49,51 @@ var translationSourceTypeSet = map[string]struct{}{
|
||||
}
|
||||
|
||||
var (
|
||||
reQuotedText = regexp.MustCompile(`['"]([^'"]{3,120})['"]`)
|
||||
reHasLetter = regexp.MustCompile(`[A-Za-zÇĞİÖŞÜçğıöşü]`)
|
||||
reBadText = regexp.MustCompile(`^(GET|POST|PUT|DELETE|OPTIONS|true|false|null|undefined)$`)
|
||||
reKeyUnsafe = regexp.MustCompile(`[^a-z0-9_]+`)
|
||||
reHasLetter = regexp.MustCompile(`[A-Za-zÇĞİÖŞÜçğıöşü]`)
|
||||
reBadText = regexp.MustCompile(`^(GET|POST|PUT|DELETE|OPTIONS|true|false|null|undefined)$`)
|
||||
reKeyUnsafe = regexp.MustCompile(`[^a-z0-9_]+`)
|
||||
reVueTemplate = regexp.MustCompile(`(?is)<template[^>]*>(.*?)</template>`)
|
||||
reVueScript = regexp.MustCompile(`(?is)<script[^>]*>(.*?)</script>`)
|
||||
reTemplateAttr = regexp.MustCompile(`\b(?:label|title|placeholder|aria-label|hint)\s*=\s*['"]([^'"]{2,180})['"]`)
|
||||
reTemplateText = regexp.MustCompile(`>([^<]{3,180})<`)
|
||||
reScriptLabelProp = regexp.MustCompile(`\blabel\s*:\s*['"]([^'"]{2,180})['"]`)
|
||||
reScriptUIProp = regexp.MustCompile(`\b(?:label|message|title|placeholder|hint)\s*:\s*['"]([^'"]{2,180})['"]`)
|
||||
reTemplateDynamic = regexp.MustCompile(`[{][{]|[}][}]`)
|
||||
)
|
||||
|
||||
var translationNoiseTokens = map[string]struct{}{
|
||||
"flat": {},
|
||||
"dense": {},
|
||||
"filled": {},
|
||||
"outlined": {},
|
||||
"borderless": {},
|
||||
"clearable": {},
|
||||
"loading": {},
|
||||
"disable": {},
|
||||
"readonly": {},
|
||||
"hide-bottom": {},
|
||||
"stack-label": {},
|
||||
"emit-value": {},
|
||||
"map-options": {},
|
||||
"use-input": {},
|
||||
"multiple": {},
|
||||
"options": {},
|
||||
"rows": {},
|
||||
"cols": {},
|
||||
"class": {},
|
||||
"style": {},
|
||||
}
|
||||
|
||||
var translationDummyAllowedVueDirs = []string{
|
||||
"pages/",
|
||||
"components/",
|
||||
"layouts/",
|
||||
}
|
||||
|
||||
var translationDummyAllowedStoreDirs = []string{
|
||||
"stores/",
|
||||
}
|
||||
|
||||
type TranslationUpdatePayload struct {
|
||||
SourceTextTR *string `json:"source_text_tr"`
|
||||
TranslatedText *string `json:"translated_text"`
|
||||
@@ -978,14 +1018,16 @@ func collectDummySeeds(limit int) []sourceSeed {
|
||||
if ext != ".vue" && ext != ".js" && ext != ".ts" {
|
||||
return nil
|
||||
}
|
||||
if !shouldCollectDummySeedFile(uiRoot, path, ext) {
|
||||
return nil
|
||||
}
|
||||
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
matches := reQuotedText.FindAllStringSubmatch(string(b), -1)
|
||||
for _, m := range matches {
|
||||
text := strings.TrimSpace(m[1])
|
||||
texts := extractVisibleUIText(string(b), ext)
|
||||
for _, text := range texts {
|
||||
if !isCandidateText(text) {
|
||||
continue
|
||||
}
|
||||
@@ -1009,6 +1051,126 @@ func collectDummySeeds(limit int) []sourceSeed {
|
||||
return out
|
||||
}
|
||||
|
||||
func shouldCollectDummySeedFile(uiRoot, fullPath, ext string) bool {
|
||||
rel, err := filepath.Rel(uiRoot, fullPath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
rel = strings.ToLower(filepath.ToSlash(rel))
|
||||
if strings.Contains(rel, "/__tests__/") || strings.Contains(rel, "/tests/") || strings.Contains(rel, "/mock/") || strings.Contains(rel, "/mocks/") {
|
||||
return false
|
||||
}
|
||||
|
||||
if ext == ".vue" {
|
||||
for _, prefix := range translationDummyAllowedVueDirs {
|
||||
if strings.HasPrefix(rel, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
if ext == ".js" || ext == ".ts" {
|
||||
for _, prefix := range translationDummyAllowedStoreDirs {
|
||||
if strings.HasPrefix(rel, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func extractVisibleUIText(content string, ext string) []string {
|
||||
out := make([]string, 0, 32)
|
||||
seen := map[string]struct{}{}
|
||||
isLikelyAttrNoise := func(text string) bool {
|
||||
tokens := strings.Fields(strings.ToLower(text))
|
||||
if len(tokens) < 2 || len(tokens) > 16 {
|
||||
return false
|
||||
}
|
||||
matched := 0
|
||||
for _, t := range tokens {
|
||||
if _, ok := translationNoiseTokens[t]; ok {
|
||||
matched++
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(t, ":") || strings.HasPrefix(t, "@") || strings.HasPrefix(t, "v-") || strings.HasPrefix(t, "#") {
|
||||
matched++
|
||||
continue
|
||||
}
|
||||
}
|
||||
return matched == len(tokens)
|
||||
}
|
||||
appendText := func(raw string) {
|
||||
if strings.ContainsAny(raw, "\r\n\t") {
|
||||
return
|
||||
}
|
||||
text := strings.TrimSpace(strings.Join(strings.Fields(raw), " "))
|
||||
if text == "" {
|
||||
return
|
||||
}
|
||||
if strings.ContainsAny(text, "<>{}[]`") {
|
||||
return
|
||||
}
|
||||
if strings.Contains(text, "=") || strings.Contains(text, "#") {
|
||||
return
|
||||
}
|
||||
if reTemplateDynamic.MatchString(text) {
|
||||
return
|
||||
}
|
||||
if isLikelyAttrNoise(text) {
|
||||
return
|
||||
}
|
||||
if _, ok := seen[text]; ok {
|
||||
return
|
||||
}
|
||||
seen[text] = struct{}{}
|
||||
out = append(out, text)
|
||||
}
|
||||
|
||||
switch ext {
|
||||
case ".vue":
|
||||
template := content
|
||||
if m := reVueTemplate.FindStringSubmatch(content); len(m) > 1 {
|
||||
template = m[1]
|
||||
}
|
||||
for _, m := range reTemplateAttr.FindAllStringSubmatch(template, -1) {
|
||||
if len(m) > 1 {
|
||||
appendText(m[1])
|
||||
}
|
||||
}
|
||||
for _, m := range reTemplateText.FindAllStringSubmatch(template, -1) {
|
||||
if len(m) > 1 {
|
||||
appendText(m[1])
|
||||
}
|
||||
}
|
||||
script := content
|
||||
if m := reVueScript.FindStringSubmatch(content); len(m) > 1 {
|
||||
script = m[1]
|
||||
}
|
||||
for _, m := range reScriptLabelProp.FindAllStringSubmatch(script, -1) {
|
||||
if len(m) > 1 {
|
||||
appendText(m[1])
|
||||
}
|
||||
}
|
||||
for _, m := range reScriptUIProp.FindAllStringSubmatch(script, -1) {
|
||||
if len(m) > 1 {
|
||||
appendText(m[1])
|
||||
}
|
||||
}
|
||||
case ".js", ".ts":
|
||||
for _, m := range reScriptUIProp.FindAllStringSubmatch(content, -1) {
|
||||
if len(m) > 1 {
|
||||
appendText(m[1])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
func autoTranslatePendingRows(db *sql.DB, langs []string, limit int) (int, error) {
|
||||
return autoTranslatePendingRowsForKeys(db, langs, limit, nil, "")
|
||||
}
|
||||
@@ -1091,6 +1253,10 @@ LIMIT $2
|
||||
failedTranslate := 0
|
||||
failedUpdate := 0
|
||||
doneByLang := map[string]int{}
|
||||
var processedCount int64
|
||||
var translatedCount int64
|
||||
var failedTranslateCount int64
|
||||
var failedUpdateCount int64
|
||||
progressEvery := parsePositiveIntEnv("TRANSLATION_AUTO_PROGRESS_EVERY", 100)
|
||||
if progressEvery <= 0 {
|
||||
progressEvery = 100
|
||||
@@ -1101,11 +1267,47 @@ LIMIT $2
|
||||
}
|
||||
progressTicker := time.Duration(progressSec) * time.Second
|
||||
lastProgress := time.Now()
|
||||
heartbeatDone := make(chan struct{})
|
||||
|
||||
go func() {
|
||||
ticker := time.NewTicker(progressTicker)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
processed := int(atomic.LoadInt64(&processedCount))
|
||||
translated := int(atomic.LoadInt64(&translatedCount))
|
||||
failedTr := int(atomic.LoadInt64(&failedTranslateCount))
|
||||
failedUpd := int(atomic.LoadInt64(&failedUpdateCount))
|
||||
elapsed := time.Since(start)
|
||||
rps := float64(translated)
|
||||
if elapsed > 0 {
|
||||
rps = float64(translated) / elapsed.Seconds()
|
||||
}
|
||||
log.Printf(
|
||||
"[TranslationAuto] trace=%s stage=heartbeat processed=%d/%d translated=%d failed_translate=%d failed_update=%d elapsed_ms=%d rps=%.2f",
|
||||
traceID,
|
||||
processed,
|
||||
len(list),
|
||||
translated,
|
||||
failedTr,
|
||||
failedUpd,
|
||||
elapsed.Milliseconds(),
|
||||
rps,
|
||||
)
|
||||
case <-heartbeatDone:
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
defer close(heartbeatDone)
|
||||
|
||||
for i, p := range list {
|
||||
tr, err := callAzureTranslate(p.Text, p.Lang)
|
||||
if err != nil || strings.TrimSpace(tr) == "" {
|
||||
failedTranslate++
|
||||
atomic.StoreInt64(&failedTranslateCount, int64(failedTranslate))
|
||||
atomic.StoreInt64(&processedCount, int64(i+1))
|
||||
continue
|
||||
}
|
||||
_, err = db.Exec(`
|
||||
@@ -1119,9 +1321,13 @@ WHERE id = $1
|
||||
`, p.ID, strings.TrimSpace(tr))
|
||||
if err != nil {
|
||||
failedUpdate++
|
||||
atomic.StoreInt64(&failedUpdateCount, int64(failedUpdate))
|
||||
atomic.StoreInt64(&processedCount, int64(i+1))
|
||||
continue
|
||||
}
|
||||
done++
|
||||
atomic.StoreInt64(&translatedCount, int64(done))
|
||||
atomic.StoreInt64(&processedCount, int64(i+1))
|
||||
doneByLang[p.Lang]++
|
||||
|
||||
processed := i + 1
|
||||
@@ -1594,6 +1800,9 @@ func isCandidateText(s string) bool {
|
||||
if strings.Contains(s, "/api/") {
|
||||
return false
|
||||
}
|
||||
if strings.ContainsAny(s, "{}[];`") {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user