Spaces:

retopara
/

ragflow

Build error

yqkcn

remove unused import (#2679)

3d5f9ea about 1 year ago

1.48 kB

	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	import re

	from api.db import LLMType
	from rag.nlp import rag_tokenizer
	from api.db.services.llm_service import LLMBundle
	from rag.nlp import tokenize


	def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
	doc = {
	"docnm_kwd": filename,
	"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename))
	}
	doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])

	# is it English
	eng = lang.lower() == "english" # is_english(sections)
	try:
	callback(0.1, "USE Sequence2Txt LLM to transcription the audio")
	seq2txt_mdl = LLMBundle(tenant_id, LLMType.SPEECH2TEXT, lang=lang)
	ans = seq2txt_mdl.transcription(binary)
	callback(0.8, "Sequence2Txt LLM respond: %s ..." % ans[:32])
	tokenize(doc, ans, eng)
	return [doc]
	except Exception as e:
	callback(prog=-1, msg=str(e))

	return []