stmnk commited on
Commit
6d966ab
·
1 Parent(s): 016d7aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -142
app.py CHANGED
@@ -12,143 +12,9 @@ def query(payload):
12
  response = req.post(API_URL, headers=headers, json=payload)
13
  return response.json()
14
 
15
- function_code = r"""
16
- def write_documents(self, documents: Union[List[dict], List[Document]], index: Optional[str] = None,
17
- batch_size: int = 10_000, duplicate_documents: Optional[str] = None):
18
-
19
- if index and not self.client.indices.exists(index=index):
20
- self._create_document_index(index)
21
-
22
- if index is None:
23
- index = self.index
24
- duplicate_documents = duplicate_documents or self.duplicate_documents
25
- assert duplicate_documents in self.duplicate_documents_options,
26
- f"duplicate_documents parameter must be {', '.join(self.duplicate_documents_options)}"
27
-
28
- field_map = self._create_document_field_map()
29
- document_objects = [Document.from_dict(d, field_map=field_map) if isinstance(d, dict) else d for d in documents]
30
- document_objects = self._handle_duplicate_documents(documents=document_objects,
31
- index=index,
32
- duplicate_documents=duplicate_documents)
33
- documents_to_index = []
34
- for doc in document_objects:
35
- _doc = {
36
- "_op_type": "index" if duplicate_documents == 'overwrite' else "create",
37
- "_index": index,
38
- **doc.to_dict(field_map=self._create_document_field_map())
39
- } # type: Dict[str, Any]
40
-
41
- # cast embedding type as ES cannot deal with np.array
42
- if _doc[self.embedding_field] is not None:
43
- if type(_doc[self.embedding_field]) == np.ndarray:
44
- _doc[self.embedding_field] = _doc[self.embedding_field].tolist()
45
-
46
- # rename id for elastic
47
- _doc["_id"] = str(_doc.pop("id"))
48
-
49
- # don't index query score and empty fields
50
- _ = _doc.pop("score", None)
51
- _doc = {k:v for k,v in _doc.items() if v is not None}
52
-
53
- # In order to have a flat structure in elastic + similar behaviour to the other DocumentStores,
54
- # we "unnest" all value within "meta"
55
- if "meta" in _doc.keys():
56
- for k, v in _doc["meta"].items():
57
- _doc[k] = v
58
- _doc.pop("meta")
59
- documents_to_index.append(_doc)
60
-
61
- # Pass batch_size number of documents to bulk
62
- if len(documents_to_index) % batch_size == 0:
63
- bulk(self.client, documents_to_index, request_timeout=300, refresh=self.refresh_type)
64
- documents_to_index = []
65
-
66
- if documents_to_index:
67
- bulk(self.client, documents_to_index, request_timeout=300, refresh=self.refresh_type)
68
-
69
- """
70
-
71
  task_code = f' Summarize Python: {function_code}'
72
  # task_code = f' Summarize Python: {dfs_code}'
73
 
74
- real_docstring = r"""
75
- Indexes documents for later queries in Elasticsearch.
76
-
77
- Behaviour if a document with the same ID already exists in ElasticSearch:
78
- a) (Default) Throw Elastic's standard error message for duplicate IDs.
79
- b) If `self.update_existing_documents=True` for DocumentStore: Overwrite existing documents.
80
- (This is only relevant if you pass your own ID when initializing a `Document`.
81
- If don't set custom IDs for your Documents or just pass a list of dictionaries here,
82
- they will automatically get UUIDs assigned. See the `Document` class for details)
83
-
84
- :param documents: a list of Python dictionaries or a list of Haystack Document objects.
85
- For documents as dictionaries, the format is {"content": "<the-actual-text>"}.
86
- Optionally: Include meta data via {"content": "<the-actual-text>",
87
- "meta":{"name": "<some-document-name>, "author": "somebody", ...}}
88
- It can be used for filtering and is accessible in the responses of the Finder.
89
- Advanced: If you are using your own Elasticsearch mapping, the key names in the dictionary
90
- should be changed to what you have set for self.content_field and self.name_field.
91
- :param index: Elasticsearch index where the documents should be indexed. If not supplied, self.index will be used.
92
- :param batch_size: Number of documents that are passed to Elasticsearch's bulk function at a time.
93
- :param duplicate_documents: Handle duplicates document based on parameter options.
94
- Parameter options : ( 'skip','overwrite','fail')
95
- skip: Ignore the duplicates documents
96
- overwrite: Update any existing documents with the same ID when adding documents.
97
- fail: an error is raised if the document ID of the document being added already
98
- exists.
99
- :raises DuplicateDocumentError: Exception trigger on duplicate document
100
- :return: None
101
- """
102
-
103
- tree_code = r"""
104
- class Tree:
105
- def __init__(self):
106
- self.val = None
107
- self.left = None
108
- self.right = None
109
- """
110
-
111
- insert_code = r"""
112
- def insert(self, val):
113
- if self.val:
114
- if val < self.val:
115
- if self.left is None:
116
- self.left = Tree(val)
117
- else:
118
- self.left.insert(val)
119
- elif val > self.val:
120
- if self.right is None:
121
- self.right = Tree(val)
122
- else:
123
- self.right.insert(val)
124
- else:
125
- self.val = val
126
- """
127
-
128
- display_code = r"""
129
- def display_tree(self: Tree, prefix='value: '):
130
- current_node = self.val
131
-
132
- if self.left:
133
- self.left.display_tree()
134
-
135
- print(prefix, current_node)
136
-
137
- if self.right:
138
- self.right.display_tree()
139
-
140
- """
141
-
142
- def pygen_func(nl_code_intent):
143
- pass # TODO: generate code PL from intent NL + search in corpus
144
- # inputs = {'code_nl': code_nl}
145
- # payload = json.dumps(inputs)
146
- # prediction = req.request(CT5_METHOD, CT5_URL, data=payload)
147
- # prediction = req.request(CT5_METHOD, CT5_URL, json=req_data)
148
- # answer = json.loads(prediction.content.decode("utf-8"))
149
- # return str(answer)
150
- # CT5_URL = "https://api-inference.huggingface.co/models/nielsr/codet5-small-code-summarization-ruby"
151
-
152
  def docgen_func(function_code, min_length, max_length, top_k, top_p, temp, repetition_penalty):
153
  m, M, k, p, t, r = int(min_length), int(max_length), int(top_k), float(top_p/100), float(temp), float(repetition_penalty)
154
  req_data = {
@@ -181,14 +47,6 @@ def docgen_func(function_code, min_length, max_length, top_k, top_p, temp, repet
181
  return msg + 'Please wait for the model to load and try again'
182
  return str(output)
183
 
184
- article_string = r"""CodeXGLLUE task definition (and dataset): **Code summarization (CodeSearchNet)**:
185
-
186
- _A model is given the task to generate natural language comments for a programming language code input._
187
-
188
- For further details, see the [CodeXGLUE](https://github.com/microsoft/CodeXGLUE) benchmark dataset and open challenge for code intelligence.
189
- """
190
- descr_string = 'The application takes as input the python code for a function, or a class, and generates a documentation string, or code comment, for it using codeT5 fine tuned for code2text generation. Code to text generation, or code summarization, is a CodeXGLUE generation, or sequence to sequence, downstream task. CodeXGLUE stands for General Language Understanding Evaluation benchmark *for code*, which includes diversified code intelligence downstream inference tasks and datasets.'
191
-
192
  iface = gr.Interface(
193
  # pygen_func,
194
  docgen_func,
 
12
  response = req.post(API_URL, headers=headers, json=payload)
13
  return response.json()
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  task_code = f' Summarize Python: {function_code}'
16
  # task_code = f' Summarize Python: {dfs_code}'
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def docgen_func(function_code, min_length, max_length, top_k, top_p, temp, repetition_penalty):
19
  m, M, k, p, t, r = int(min_length), int(max_length), int(top_k), float(top_p/100), float(temp), float(repetition_penalty)
20
  req_data = {
 
47
  return msg + 'Please wait for the model to load and try again'
48
  return str(output)
49
 
 
 
 
 
 
 
 
 
50
  iface = gr.Interface(
51
  # pygen_func,
52
  docgen_func,