{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "b52e9a66-a8e9-4f56-91fd-8564b5b636fc", "metadata": {}, "outputs": [], "source": [ "# import nbimporter\n", "import nbimporter\n", "from Text2List import text_to_list\n", "def convert_to_list(text, text_list):\n", " matched_words = []\n", " unmatched_text = '' # To accumulate unmatched characters\n", "\n", " # Sort text_list by length in descending order to prioritize longest matches first\n", " text_list_sorted = sorted(text_list, key=len, reverse=True)\n", "\n", " while text:\n", " matched = False\n", " for word in text_list_sorted:\n", " if text.startswith(word):\n", " # Add any accumulated unmatched text before appending the matched word\n", " if unmatched_text:\n", " matched_words.append(unmatched_text)\n", " unmatched_text = '' # Reset unmatched text accumulator\n", "\n", " matched_words.append(word)\n", " text = text[len(word):] # Remove the matched part from text\n", " matched = True\n", " break\n", "\n", " if not matched:\n", " # Accumulate unmatched characters\n", " unmatched_text += text[0]\n", " text = text[1:]\n", "\n", " # If there's any remaining unmatched text, add it to the result\n", " if unmatched_text:\n", " matched_words.append(unmatched_text)\n", "\n", " # Join matched words and unmatched text with a space\n", " result = ' '.join(matched_words)\n", " return result\n", " \n", "# text = \"जीरोएकदोतीनचारपांचछहसातआठनौदसजीरोएकदोतीनचारपांच\"\n", "\n", "# if __name__==\"__main__\":\n", "# converted=convert_to_list(text, text_to_list())\n", "# print(converted)" ] }, { "cell_type": "code", "execution_count": null, "id": "98835c96-2949-4e78-8d1e-c8623d5dcb00", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }