Skip to content

Commit

Permalink
Wikidata (#3), 1603:1:51 (#9): 1603_3_12.py command line both allow g…
Browse files Browse the repository at this point in the history
…enerate raw SPARQL query and generate CSV/TSV directly
  • Loading branch information
fititnt committed Jan 22, 2022
1 parent 2faf531 commit afd9ffc
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 26 deletions.
7 changes: 5 additions & 2 deletions officinam/1603/3/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ The bars on the logo contain the word "WIKI" encoded in Morse code.[38] It was c
- https://www.wikidata.org/wiki/Wikidata:In_one_page
- https://upload.wikimedia.org/wikipedia/commons/8/8d/Wikidata-in-brief-1.0.pdf
- https://sinaahmadi.github.io/posts/10-essential-sparql-queries-for-lexicographical-data-on-wikidata.html
- https://en.wikibooks.org/wiki/SPARQL

> TODO: is possible also generate diffs; see
> - https://www.wikidata.org/wiki/Help:Wikimedia_language_codes/lists/all
> - https://wikidata-todo.toolforge.org/sparql_rc.php?start=last+week&end=&user_lang=&sort_mode=last_edit&no_bots=1&skip_unchanged=1&sparql=SELECT%0A%20%20%3Fitem%20%0A%20%20%3Fc%20%28CONTAINS%28%3Fc%2C%22-%22%29%20as%20%3Fsubtag%29%0A%20%20%3Fwdlabelen%0A%20%20%28CONCAT%28%22%5B%5B%3Aen%3A%22%2C%3Fenwikipeda%2C%22%5Cu007C%22%2C%3Fenwikipeda%2C%22%5D%5D%22%29%20as%20%3Fwikipedia_link_en%29%0A%20%20%3Flang%0A%20%20%3Fwdlabelinlang%0A%20%20%28CONCAT%28%22%5B%5B%3A%22%2C%3Flang%2C%22%3A%22%2C%3Fwikipeda%2C%22%5Cu007C%22%2C%3Fwikipeda%2C%22%5D%5D%22%29%20as%20%3Fwikipedia_link%29%0AWHERE%0A%7B%0A%20%20VALUES%20%3Flang%20%7B%20%22fr%22%20%7D%0A%20%20%3Fitem%20wdt%3AP424%20%3Fc%20.%0A%20%20hint%3APrior%20hint%3ArangeSafe%20true%20.%0A%20%20MINUS%7B%3Fitem%20wdt%3AP31%20wd%3AQ47495990%7D%0A%20%20MINUS%7B%3Fitem%20wdt%3AP31%2Fwdt%3AP279%2A%20wd%3AQ14827288%7D%20%23exclude%20Wikimedia%20projects%0A%20%20MINUS%7B%3Fitem%20wdt%3AP31%2Fwdt%3AP279%2A%20wd%3AQ17442446%7D%20%23exclude%20Wikimedia%20internal%20stuff%0A%20%20OPTIONAL%20%7B%20%3Fitem%20rdfs%3Alabel%20%3Fwdlabelinlang%20.%20FILTER%28%20lang%28%3Fwdlabelinlang%29%3D%20%22fr%22%20%29%20%7D%0A%20%20OPTIONAL%20%7B%20%3Fitem%20rdfs%3Alabel%20%3Fwdlabelen%20.%20FILTER%28lang%28%3Fwdlabelen%29%3D%22en%22%29%20%7D%0A%20%20OPTIONAL%20%7B%20%5B%5D%20schema%3Aabout%20%3Fitem%20%3B%20schema%3AinLanguage%20%3Flang%3B%20schema%3AisPartOf%20%2F%20wikibase%3AwikiGroup%20%22wikipedia%22%20%3B%20schema%3Aname%20%3Fwikipeda%20%7D%20%0A%20%20OPTIONAL%20%7B%20%5B%5D%20schema%3Aabout%20%3Fitem%20%3B%20schema%3AinLanguage%20%22en%22%3B%20schema%3AisPartOf%20%2F%20wikibase%3AwikiGroup%20%22wikipedia%22%20%3B%20schema%3Aname%20%3Fenwikipeda%20%7D%20%0A%7D%0AORDER%20BY%20%3Fc

### [`1603:3.12:6`] /Speciālis collēctiōnī de Vicidata Proprietātī/
Expand All @@ -54,8 +59,6 @@ The bars on the logo contain the word "WIKI" encoded in Morse code.[38] It was c

### [`1603:3.1603:45:1`] //



- Fontem: [../45/1/1603_45_1.no1.tm.hxl.csv](../45/1/1603_45_1.no1.tm.hxl.csv)

Exemplum:
Expand Down
164 changes: 140 additions & 24 deletions officinam/999999999/0/1603_3_12.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@

# ./999999999/0/1603_3_12.py
# NUMERORDINATIO_BASIM="/external/ndata" ./999999999/0/1603_3_12.py
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-quod-sparql
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --wikidata-link
# printf "Q1065\nQ82151\n" | ./999999999/0/1603_3_12.py --actionem-sparql --query | ./999999999/0/1603_3_12.py --actionem-sparql --tsv > 999999/0/test.tsv

# TODO: https://sinaahmadi.github.io/posts/10-essential-sparql-queries-for-lexicographical-data-on-wikidata.html

Expand All @@ -43,6 +46,9 @@
Union
)

import urllib.parse
import requests

# from itertools import permutations
from itertools import product
# valueee = list(itertools.permutations([1, 2, 3]))
Expand Down Expand Up @@ -73,7 +79,7 @@

# a aa aaa
# printf "30160\n1830260\n109830360\n" | ./999999999/0/2600.py --actionem-decifram
# ./999999999/0/1603_3_12.py --actionem-sparql
# ./999999999/0/1603_3_12.py --actionem-quod-sparql


# SELECT ?item ?itemLabel
Expand Down Expand Up @@ -105,16 +111,18 @@ def __init__(self):
self.resultatum_separato = "\t"

# TODO: make this accept options from command line
self.qid = [
'Q1065',
'Q82151',
'Q125761',
'Q7809',
'Q386120',
'Q61923',
'Q7164',
# '...'
]
# self.qid = [
# 'Q1065',
# 'Q82151',
# 'Q125761',
# 'Q7809',
# 'Q386120',
# 'Q61923',
# 'Q7164',
# # '...'
# ]

self.qid = []

def _init_1613_1_51_datum(self):
# archivum = NUMERORDINATIO_BASIM + "/1613/1603_2_60.no1.tm.hxl.tsv"
Expand Down Expand Up @@ -153,6 +161,12 @@ def est_resultatum_separato(self, resultatum_separato: str):
self.resultatum_separato = resultatum_separato
return self

def est_wikidata_q(self, wikidata_codicem: str):
if wikidata_codicem not in self.qid:
self.qid.append(wikidata_codicem)

return self

# def query(self):
# term = """# https://en.wikiversity.org/wiki/Research_in_programming_Wikidata/Countries#List_of_countries
# # https://w.wiki/4ij4
Expand All @@ -179,6 +193,7 @@ def est_resultatum_separato(self, resultatum_separato: str):
# }
# }


def query(self):
qid = ['wd:' + x for x in self.qid if isinstance(x, str)]
# select = '?item ' + " ".join(self._query_linguam())
Expand All @@ -187,12 +202,17 @@ def query(self):
filter_otional = []
for pair in self.D1613_1_51_langpair:
select.append('?' + pair[1])
# filter_otional.append(
# '?item rdfs:label ?' +
# pair[1] + ' filter (lang(?' + pair[1] +
# ') = "' + pair[0] + '").'
# )
filter_otional.append(
'?item rdfs:label ?' +
'OPTIONAL { ?item rdfs:label ?' +
pair[1] + ' filter (lang(?' + pair[1] +
') = "' + pair[0] + '").'
') = "' + pair[0] + '"). }'
)
filter_otional_done = [' ' + x for x in filter_otional]
filter_otional_done = [' ' + x for x in filter_otional]
# print('select', self.D1613_1_51_langpair)
# print('select', select)
# print('filter_otional', filter_otional)
Expand All @@ -201,16 +221,16 @@ def query(self):
WHERE
{{
VALUES ?item {{ {qitems} }}
OPTIONAL {{
{langfilter}
}}
}}
""".format(
qitems=" ".join(qid),
select=" ".join(select),
langfilter="\n".join(filter_otional_done),
)
# """.format(qitems = " ".join(self.qid))

# [TRY IT ↗]()
return term

def exportatum_sparql(self):
Expand Down Expand Up @@ -280,6 +300,39 @@ def make_args(self, hxl_output=True):
nargs='?'
)

neo_codex.add_argument(
'--query',
help='Generate SPARQL query',
metavar='',
dest='query',
const=True,
nargs='?'
)
neo_codex.add_argument(
'--wikidata-link',
help='Generate query.wikidata.org link (from piped in query)',
metavar='',
dest='wikidata_link',
const=True,
nargs='?'
)
neo_codex.add_argument(
'--csv',
help='Generate TSV output (from piped in query)',
metavar='',
dest='csv',
const=True,
nargs='?'
)
neo_codex.add_argument(
'--tsv',
help='Generate TSV output (from piped in query)',
metavar='',
dest='tsv',
const=True,
nargs='?'
)

# neo_codex.add_argument(
# '--actionem-verbum-simplex',
# help='Do not generate the codes. Just calculate the full matrix ' +
Expand Down Expand Up @@ -514,14 +567,77 @@ def execute_cli(self, pyargs, stdin=STDIN, stdout=sys.stdout,
# return self.EXIT_OK

if self.pyargs.actionem_sparql:
systema_numerali = cs1603_3_12.exportatum_sparql(
# self.pyargs.tabulam_numerae_initiale,
# self.pyargs.tabulam_numerae_finale,
# self.pyargs.tabulam_numerae_gradus
)
tabulam_numerae = ['TODO']
# return self.output(tabulam_numerae)
return self.output(systema_numerali)
# print('oi')

if self.pyargs.query:
if stdin.isatty():
print("ERROR. Please pipe data in. \nExample:\n"
" cat data.txt | {0} --actionem-quod-sparql\n"
" printf \"Q1065\\nQ82151\\n\" | {0} --actionem-quod-sparql"
"".format(__file__))
return self.EXIT_ERROR

for line in sys.stdin:
codicem = line.replace('\n', ' ').replace('\r', '')
# TODO: deal with cases were have more than Qcode
cs1603_3_12.est_wikidata_q(codicem)

quod_query = cs1603_3_12.exportatum_sparql()
# tabulam_numerae = ['TODO']
# return self.output(tabulam_numerae)
return self.output(quod_query)

if self.pyargs.wikidata_link:
if stdin.isatty():
print("ERROR. Please pipe data in. \nExample:\n"
" cat data.txt | {0} --actionem-sparql --query | {0} --actionem-sparql --wikidata-link\n"
" printf \"Q1065\\nQ82151\\n\" | {0} --actionem-sparql --query | {0} --actionem-sparql --wikidata-link"
"".format(__file__))
return self.EXIT_ERROR

full_query = []
for line in sys.stdin:
full_query.append(line)

wikidata_backend = "https://query.wikidata.org/#"
quod_query = wikidata_backend + \
urllib.parse.quote("".join(full_query).encode('utf8'))

print(quod_query)
return self.EXIT_OK

if self.pyargs.tsv or self.pyargs.csv:
if stdin.isatty():
print("ERROR. Please pipe data in. \nExample:\n"
" cat data.txt | {0} --actionem-sparql --query | {0} --actionem-sparql --tsv\n"
" printf \"Q1065\\nQ82151\\n\" | {0} --actionem-sparql --query | {0} --actionem-sparql --tsv"
"".format(__file__))
return self.EXIT_ERROR

full_query = []
for line in sys.stdin:
full_query.append(line)

sparql_backend = "https://query.wikidata.org/sparql"

# https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/en#Supported_formats

if self.pyargs.tsv:
headers = {'Accept': 'text/tab-separated-values'}
if self.pyargs.csv:
headers = {'Accept': 'text/csv'}

payload_query = "".join(full_query)
r = requests.get(sparql_backend, headers=headers, params={
'query': payload_query
})

# print('oi tsv', r.text)
# print('r.request.headers', r.request.headers)
# print('r.headers', r.headers)
print(r.text)
# print(r.content)
return self.EXIT_OK

# if self.pyargs.verbum_simplex:
# tabulam_multiplicatio = cs1603_3_12.quod_tabulam_multiplicatio()
Expand Down

0 comments on commit afd9ffc

Please sign in to comment.