{ "cells": [ { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "filename = \"_chat.txt\"\n", "pattern = \"^\\[\\d{2}\\.\\d{2}\\.\\d{2},\\s\\d{2}:\\d{2}:\\d{2}\\].[^:]*:\"" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "messages = []\n", "with open(filename,'r', encoding=\"utf8\") as f:\n", " for line in f:\n", " match = re.search(pattern,line)\n", " #print(line)\n", " #print(match)\n", " if match:\n", " #print(match)\n", " content = re.sub(pattern,'',line)\n", " messages.append(content)\n", " else:\n", " messages[-1] += line" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "questions = [msg.replace('\\n', '') for msg in messages if \"?\" in msg]\n" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "with open(\"questions\", \"wb\") as fp:\n", " pickle.dump(questions, fp)\n" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1821" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(questions)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "import keyboard\n", "with open(\"questions\", \"rb\") as fp:\n", " questions = pickle.load(fp)\n", "try:\n", " with open(\"remaining\", \"rb\") as fp2:\n", " remaining = pickle.load(fp2)\n", "except:\n", " remaining = []\n", "for idx, question in enumerate(questions):\n", " try:\n", " print(question)\n", " command = input()\n", " if command == \"d\":\n", " continue\n", " elif command == \"s\":\n", " remaining.append(question)\n", " elif command == \"m\":\n", " modified = input(\"Enter new modified sentence\")\n", " remaining.append(modified)\n", " except:\n", " print(f\"Canceled at question {idx}\")\n", " questions = questions[idx:]\n", " break\n", "with open(\"questions\", \"wb\") as fp:\n", " pickle.dump(questions, fp)\n", "with open(\"remaining\", \"wb\") as fp:\n", " pickle.dump(remaining, fp)\n", " " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "with open(\"./remaining\", \"rb\") as fp:\n", " remaining = pickle.load(fp)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "list" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(remaining)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.DataFrame(data={\"question\": remaining})\n", "df.to_csv(\"./questions.csv\", sep=',',index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.7.9 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "999ea782e2d719ec62688e738a2ff20f2535cd73f1388dd13a2d835295a4fc1a" } } }, "nbformat": 4, "nbformat_minor": 2 }