Spaces:
Runtime error
Runtime error
File size: 7,265 Bytes
ec93d6d 016d7aa 54c50a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
dfs_code = r"""
def dfs(visited, graph, node): #function for dfs
if node not in visited:
print (node)
visited.add(node)
for neighbour in graph[node]:
dfs(visited, graph, neighbour)
"""
function_code = r"""
def write_documents(self, documents: Union[List[dict], List[Document]], index: Optional[str] = None,
batch_size: int = 10_000, duplicate_documents: Optional[str] = None):
if index and not self.client.indices.exists(index=index):
self._create_document_index(index)
if index is None:
index = self.index
duplicate_documents = duplicate_documents or self.duplicate_documents
assert duplicate_documents in self.duplicate_documents_options,
f"duplicate_documents parameter must be {', '.join(self.duplicate_documents_options)}"
field_map = self._create_document_field_map()
document_objects = [Document.from_dict(d, field_map=field_map) if isinstance(d, dict) else d for d in documents]
document_objects = self._handle_duplicate_documents(documents=document_objects,
index=index,
duplicate_documents=duplicate_documents)
documents_to_index = []
for doc in document_objects:
_doc = {
"_op_type": "index" if duplicate_documents == 'overwrite' else "create",
"_index": index,
**doc.to_dict(field_map=self._create_document_field_map())
} # type: Dict[str, Any]
# cast embedding type as ES cannot deal with np.array
if _doc[self.embedding_field] is not None:
if type(_doc[self.embedding_field]) == np.ndarray:
_doc[self.embedding_field] = _doc[self.embedding_field].tolist()
# rename id for elastic
_doc["_id"] = str(_doc.pop("id"))
# don't index query score and empty fields
_ = _doc.pop("score", None)
_doc = {k:v for k,v in _doc.items() if v is not None}
# In order to have a flat structure in elastic + similar behaviour to the other DocumentStores,
# we "unnest" all value within "meta"
if "meta" in _doc.keys():
for k, v in _doc["meta"].items():
_doc[k] = v
_doc.pop("meta")
documents_to_index.append(_doc)
# Pass batch_size number of documents to bulk
if len(documents_to_index) % batch_size == 0:
bulk(self.client, documents_to_index, request_timeout=300, refresh=self.refresh_type)
documents_to_index = []
if documents_to_index:
bulk(self.client, documents_to_index, request_timeout=300, refresh=self.refresh_type)
"""
real_docstring = r"""
Indexes documents for later queries in Elasticsearch.
Behaviour if a document with the same ID already exists in ElasticSearch:
a) (Default) Throw Elastic's standard error message for duplicate IDs.
b) If `self.update_existing_documents=True` for DocumentStore: Overwrite existing documents.
(This is only relevant if you pass your own ID when initializing a `Document`.
If don't set custom IDs for your Documents or just pass a list of dictionaries here,
they will automatically get UUIDs assigned. See the `Document` class for details)
:param documents: a list of Python dictionaries or a list of Haystack Document objects.
For documents as dictionaries, the format is {"content": "<the-actual-text>"}.
Optionally: Include meta data via {"content": "<the-actual-text>",
"meta":{"name": "<some-document-name>, "author": "somebody", ...}}
It can be used for filtering and is accessible in the responses of the Finder.
Advanced: If you are using your own Elasticsearch mapping, the key names in the dictionary
should be changed to what you have set for self.content_field and self.name_field.
:param index: Elasticsearch index where the documents should be indexed. If not supplied, self.index will be used.
:param batch_size: Number of documents that are passed to Elasticsearch's bulk function at a time.
:param duplicate_documents: Handle duplicates document based on parameter options.
Parameter options : ( 'skip','overwrite','fail')
skip: Ignore the duplicates documents
overwrite: Update any existing documents with the same ID when adding documents.
fail: an error is raised if the document ID of the document being added already
exists.
:raises DuplicateDocumentError: Exception trigger on duplicate document
:return: None
"""
tree_code = r"""
class Tree:
def __init__(self):
self.val = None
self.left = None
self.right = None
"""
insert_code = r"""
def insert(self, val):
if self.val:
if val < self.val:
if self.left is None:
self.left = Tree(val)
else:
self.left.insert(val)
elif val > self.val:
if self.right is None:
self.right = Tree(val)
else:
self.right.insert(val)
else:
self.val = val
"""
display_code = r"""
def display_tree(self: Tree, prefix='value: '):
current_node = self.val
if self.left:
self.left.display_tree()
print(prefix, current_node)
if self.right:
self.right.display_tree()
"""
article_string = r"""CodeXGLLUE task definition (and dataset): **Code summarization (CodeSearchNet)**:
_A model is given the task to generate natural language comments for a programming language code input._
For further details, see the [CodeXGLUE](https://github.com/microsoft/CodeXGLUE) benchmark dataset and open challenge for code intelligence.
"""
descr_string = 'The application takes as input the python code for a function, or a class, and generates a documentation string, or code comment, for it using codeT5 fine tuned for code2text generation. Code to text generation, or code summarization, is a CodeXGLUE generation, or sequence to sequence, downstream task. CodeXGLUE stands for General Language Understanding Evaluation benchmark *for code*, which includes diversified code intelligence downstream inference tasks and datasets.'
def pygen_func(nl_code_intent):
pass # TODO: generate code PL from intent NL + search in corpus
# inputs = {'code_nl': code_nl}
# payload = json.dumps(inputs)
# prediction = req.request(CT5_METHOD, CT5_URL, data=payload)
# prediction = req.request(CT5_METHOD, CT5_URL, json=req_data)
# answer = json.loads(prediction.content.decode("utf-8"))
# return str(answer)
# CT5_URL = "https://api-inference.huggingface.co/models/nielsr/codet5-small-code-summarization-ruby"
|