Vadim Markovtsev, source{d}.
Vadim Markovtsev
@vadimlearning
class foobar:
def connecttoserver(self):
myserverhost = globalconfig.server.host
class FooBar:
def connect_to_server(self):
myServerHost = globalConfig.server.host
>>> from sourced.engine import Engine
>>> engine = Engine(spark, "/path/to/siva/files", "siva")
>>> engine.repositories.references.head_ref \
.commits.tree_entries.blobs \
.classify_languages() \
.select("blob_id", "path", "lang") \
.show()
>>> engine.repositories.references.head_ref \
.commits.tree_entries.blobs \
.classify_languages() \
.filter('lang = "Python"') \
.extract_uasts() \
.query_uast('//*[@roleIdentifier]') \
.extract_tokens("result", "tokens") \
.select("blob_id", "path", "tokens")
\( \begin{split} V_1 \Leftrightarrow & \,\texttt{"foo"} \\ \\ V_2 \Leftrightarrow & \,\texttt{"bar"} \\ \\ V_3 \Leftrightarrow & \,\texttt{"integrate"} \end{split} \)
\( distance(V_1, V_2) < distance(V_1, V_3) \)
$$ distance(V_i, V_j) = \arccos \frac{V_i \cdot V_j}{\left\lVert V_i \right\rVert \left\lVert V_j \right\rVert} $$
$$ V_i \cdot V_j = PMI_{ij} = \log\frac{C_{ij} \sum C}{\sum_{k = 1}^N C_{ik}\sum_{k = 1}^N C_{jk}} $$
“bug” - “test” + “expect” = “suppress”
“database” - “query” + “tune” = “settings”
“send” - “receive” + “pop” = “push”
>>> from sourced.ml.models import Id2Vec
>>> m = Id2Vec().load()
>>> print(m.embeddings[m["foo"]])