stmnk commited on
Commit
016d7aa
·
1 Parent(s): 46b560f

Update strings.py

Browse files
Files changed (1) hide show
  1. strings.py +144 -1
strings.py CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
  dfs_code = r"""
2
  def dfs(visited, graph, node): #function for dfs
3
  if node not in visited:
@@ -5,4 +15,137 @@ def dfs(visited, graph, node): #function for dfs
5
  visited.add(node)
6
  for neighbour in graph[node]:
7
  dfs(visited, graph, neighbour)
8
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def pygen_func(nl_code_intent):
2
+ pass # TODO: generate code PL from intent NL + search in corpus
3
+ # inputs = {'code_nl': code_nl}
4
+ # payload = json.dumps(inputs)
5
+ # prediction = req.request(CT5_METHOD, CT5_URL, data=payload)
6
+ # prediction = req.request(CT5_METHOD, CT5_URL, json=req_data)
7
+ # answer = json.loads(prediction.content.decode("utf-8"))
8
+ # return str(answer)
9
+ # CT5_URL = "https://api-inference.huggingface.co/models/nielsr/codet5-small-code-summarization-ruby"
10
+
11
  dfs_code = r"""
12
  def dfs(visited, graph, node): #function for dfs
13
  if node not in visited:
 
15
  visited.add(node)
16
  for neighbour in graph[node]:
17
  dfs(visited, graph, neighbour)
18
+ """
19
+
20
+ function_code = r"""
21
+ def write_documents(self, documents: Union[List[dict], List[Document]], index: Optional[str] = None,
22
+ batch_size: int = 10_000, duplicate_documents: Optional[str] = None):
23
+
24
+ if index and not self.client.indices.exists(index=index):
25
+ self._create_document_index(index)
26
+
27
+ if index is None:
28
+ index = self.index
29
+ duplicate_documents = duplicate_documents or self.duplicate_documents
30
+ assert duplicate_documents in self.duplicate_documents_options,
31
+ f"duplicate_documents parameter must be {', '.join(self.duplicate_documents_options)}"
32
+
33
+ field_map = self._create_document_field_map()
34
+ document_objects = [Document.from_dict(d, field_map=field_map) if isinstance(d, dict) else d for d in documents]
35
+ document_objects = self._handle_duplicate_documents(documents=document_objects,
36
+ index=index,
37
+ duplicate_documents=duplicate_documents)
38
+ documents_to_index = []
39
+ for doc in document_objects:
40
+ _doc = {
41
+ "_op_type": "index" if duplicate_documents == 'overwrite' else "create",
42
+ "_index": index,
43
+ **doc.to_dict(field_map=self._create_document_field_map())
44
+ } # type: Dict[str, Any]
45
+
46
+ # cast embedding type as ES cannot deal with np.array
47
+ if _doc[self.embedding_field] is not None:
48
+ if type(_doc[self.embedding_field]) == np.ndarray:
49
+ _doc[self.embedding_field] = _doc[self.embedding_field].tolist()
50
+
51
+ # rename id for elastic
52
+ _doc["_id"] = str(_doc.pop("id"))
53
+
54
+ # don't index query score and empty fields
55
+ _ = _doc.pop("score", None)
56
+ _doc = {k:v for k,v in _doc.items() if v is not None}
57
+
58
+ # In order to have a flat structure in elastic + similar behaviour to the other DocumentStores,
59
+ # we "unnest" all value within "meta"
60
+ if "meta" in _doc.keys():
61
+ for k, v in _doc["meta"].items():
62
+ _doc[k] = v
63
+ _doc.pop("meta")
64
+ documents_to_index.append(_doc)
65
+
66
+ # Pass batch_size number of documents to bulk
67
+ if len(documents_to_index) % batch_size == 0:
68
+ bulk(self.client, documents_to_index, request_timeout=300, refresh=self.refresh_type)
69
+ documents_to_index = []
70
+
71
+ if documents_to_index:
72
+ bulk(self.client, documents_to_index, request_timeout=300, refresh=self.refresh_type)
73
+
74
+ """
75
+
76
+ real_docstring = r"""
77
+ Indexes documents for later queries in Elasticsearch.
78
+
79
+ Behaviour if a document with the same ID already exists in ElasticSearch:
80
+ a) (Default) Throw Elastic's standard error message for duplicate IDs.
81
+ b) If `self.update_existing_documents=True` for DocumentStore: Overwrite existing documents.
82
+ (This is only relevant if you pass your own ID when initializing a `Document`.
83
+ If don't set custom IDs for your Documents or just pass a list of dictionaries here,
84
+ they will automatically get UUIDs assigned. See the `Document` class for details)
85
+
86
+ :param documents: a list of Python dictionaries or a list of Haystack Document objects.
87
+ For documents as dictionaries, the format is {"content": "<the-actual-text>"}.
88
+ Optionally: Include meta data via {"content": "<the-actual-text>",
89
+ "meta":{"name": "<some-document-name>, "author": "somebody", ...}}
90
+ It can be used for filtering and is accessible in the responses of the Finder.
91
+ Advanced: If you are using your own Elasticsearch mapping, the key names in the dictionary
92
+ should be changed to what you have set for self.content_field and self.name_field.
93
+ :param index: Elasticsearch index where the documents should be indexed. If not supplied, self.index will be used.
94
+ :param batch_size: Number of documents that are passed to Elasticsearch's bulk function at a time.
95
+ :param duplicate_documents: Handle duplicates document based on parameter options.
96
+ Parameter options : ( 'skip','overwrite','fail')
97
+ skip: Ignore the duplicates documents
98
+ overwrite: Update any existing documents with the same ID when adding documents.
99
+ fail: an error is raised if the document ID of the document being added already
100
+ exists.
101
+ :raises DuplicateDocumentError: Exception trigger on duplicate document
102
+ :return: None
103
+ """
104
+
105
+ tree_code = r"""
106
+ class Tree:
107
+ def __init__(self):
108
+ self.val = None
109
+ self.left = None
110
+ self.right = None
111
+ """
112
+
113
+ insert_code = r"""
114
+ def insert(self, val):
115
+ if self.val:
116
+ if val < self.val:
117
+ if self.left is None:
118
+ self.left = Tree(val)
119
+ else:
120
+ self.left.insert(val)
121
+ elif val > self.val:
122
+ if self.right is None:
123
+ self.right = Tree(val)
124
+ else:
125
+ self.right.insert(val)
126
+ else:
127
+ self.val = val
128
+ """
129
+
130
+ display_code = r"""
131
+ def display_tree(self: Tree, prefix='value: '):
132
+ current_node = self.val
133
+
134
+ if self.left:
135
+ self.left.display_tree()
136
+
137
+ print(prefix, current_node)
138
+
139
+ if self.right:
140
+ self.right.display_tree()
141
+
142
+ """
143
+
144
+ article_string = r"""CodeXGLLUE task definition (and dataset): **Code summarization (CodeSearchNet)**:
145
+
146
+ _A model is given the task to generate natural language comments for a programming language code input._
147
+
148
+ For further details, see the [CodeXGLUE](https://github.com/microsoft/CodeXGLUE) benchmark dataset and open challenge for code intelligence.
149
+ """
150
+
151
+ descr_string = 'The application takes as input the python code for a function, or a class, and generates a documentation string, or code comment, for it using codeT5 fine tuned for code2text generation. Code to text generation, or code summarization, is a CodeXGLUE generation, or sequence to sequence, downstream task. CodeXGLUE stands for General Language Understanding Evaluation benchmark *for code*, which includes diversified code intelligence downstream inference tasks and datasets.'