73 lines
9.4 KiB
Python
Raw Normal View History

2022-10-13 12:27:33 -07:00
from typing import Counter
from elasticsearch import Elasticsearch
import json
import urllib.request
import os
import argparse
from timeit import default_timer as timer
import shutil
from functools import cache
import time
from tqdm import tqdm
from vars import url, api_key_1, api_key_2
import logging
from torchreid.utils import FeatureExtractor
from datetime import datetime
import numpy
'''
Takes in a query and adds the feature vectors into elastic search
query can be dynamically ajusted based in time frame. Currently feature vectors are only
used on
'''
name = 'image'
input_path = f"./media/{name}/"
es = Elasticsearch(url, api_key=(api_key_1, api_key_2))
f = open('query.json',)
search_query = json.load(f)
global_end_time = datetime.now().isoformat()
global_start_time = '2022-10-13T07:17:15.892850'
script_query = {
2022-10-17 11:32:10 -07:00
"query": {
"script_score": {
"query": {
"exists": {
"field": "person_vectors"}
},
"script": {
"source": "cosineSimilarity(params.queryVector, 'person_vectors') + 1.0",
"params": {
"queryVector": [0.7190567851066589, 0.008577888831496239, 0.44059064984321594, 0, 0.2993614673614502, 0, 1.5982489585876465, 0.7433022856712341, 0.4853684902191162, 0.23498082160949707, 0.3664013147354126, 0.7315822839736938, 1.0979870557785034, 0, 0.9605587124824524, 0.8897871971130371, 1.6408390998840332, 0.7487296462059021, 0.885391354560852, 1.2531311511993408, 0.892846941947937, 0.24019311368465424, 0.32364320755004883, 0, 2.53128719329834, 0.10714412480592728, 0, 0.5916593074798584, 0.08135084807872772, 0, 1.5359987020492554, 0, 0.2504684031009674, 1.7827059030532837, 0, 1.4176852703094482, 0.6488818526268005, 1.5497767925262451, 1.164306640625, 3.3588039875030518, 0.25189658999443054, 1.3223854303359985, 0.03131337836384773, 0, 0, 0.48733654618263245, 0.13395749032497406, 1.439650535583496, 1.9655312299728394, 0.18889208137989044, 0, 0, 2.0258777141571045, 0.24703159928321838, 0, 2.243384599685669, 1.1586835384368896, 0.23071441054344177, 0.7587310075759888, 0, 0.610011100769043, 0.4172978699207306, 1.6066769361495972, 1.1523643732070923, 0.06450517475605011, 0.5152580142021179, 0.0029307412914931774, 0.6792735457420349, 0, 0, 0, 0.891771137714386, 0.8577366471290588, 0.48661378026008606, 0, 0.9169254302978516, 0.5252501964569092, 0.636182963848114, 0.28452324867248535, 0.00624100724235177, 1.779815673828125, 0.6684868335723877, 0.35277050733566284, 0.40771764516830444, 0.46059921383857727, 0.08505523204803467, 0.26748353242874146, 0, 1.0608913898468018, 0.6663370132446289, 1.7243709564208984, 0.40593674778938293, 0, 0.1447577327489853, 1.0585582256317139, 1.1423757076263428, 0.20860086381435394, 0, 0.011838573962450027, 0.17520694434642792, 0, 0.04941047355532646, 0, 0.2007238119840622, 2.2279510498046875, 1.5872749090194702, 0, 0.11534080654382706, 0, 1.2216063737869263, 0.05639352649450302, 1.609881043434143, 0, 0.0850832536816597, 0.8145129084587097, 0.3628203570842743, 0.07895816117525101, 0, 0.4664478302001953, 0.8357388973236084, 0, 0.5207036733627319, 0.3278266489505768, 1.1790447235107422, 0, 0, 0.09382054209709167, 0.45543596148490906, 0, 0.7800145149230957, 0, 0, 0, 0.2481914609670639, 0.9727578163146973, 0, 0.8668861389160156, 0.42392200231552124, 2.2217330932617188, 0.042975522577762604, 0, 2.8870198726654053, 1.892953634262085, 1.0418862104415894, 1.5774325132369995, 1.7152574062347412, 0, 1.016575813293457, 0.4207040071487427, 0.22386038303375244, 0.5424627065658569, 0, 1.9843037128448486, 1.6185767650604248, 0.7700446248054504, 0.5901507139205933, 1.061691403388977, 0.74217689037323, 0, 2.4153037071228027, 0.27418941259384155, 0.783516526222229, 0, 0.5848633050918579, 1.2426505088806152, 1.0478225946426392, 1.3246703147888184, 0.6793762445449829, 0.9392397403717041, 0, 0.9709073901176453, 0.3652898669242859, 1.3471348285675049, 0.2562827169895172, 0, 0.581573486328125, 0, 0.9481611847877502, 0.507270872592926, 0.7533062696456909, 1.4821592569351196, 0.9436051249504089, 0.35450828075408936, 0, 0.19321130216121674, 0.23883885145187378, 0.014973913319408894, 0.36931151151657104, 1.2540863752365112, 0, 1.3181819915771484, 0, 0, 0.6472974419593811, 0, 0, 1.3794875144958496, 2.059051275253296, 0.5884737372398376, 1.808712124824524, 0, 0.8089750409126282, 0.62647545337677, 0, 1.665348768234253, 1.9968191385269165, 0.3527408540248871, 0.0035672676749527454, 1.8799611330032349, 0.9392209053039551, 0.07137293368577957, 0.36014267802238464, 0.7252545952796936, 0, 0.5226032137870789, 0.09756691008806229, 0, 0.055440325289964676, 0.9390162229537964, 1.2112655639648438, 0, 0.8993765711784363, 0, 0.878217339515686, 3.6813886165618896, 0, 0, 2.8264458179473877, 0.24219828844070435, 0, 0, 1.0634945631027222, 0, 0, 0.8261040449142456, 0.9613623023033142, 1.9318516254425049, 0, 1.4986813068389893, 0.2156262844800949, 0.2666844427585602, 0, 0.5157204270362854, 0, 1.5937849283218384, 0, 0, 0, 3.0056838989257812, 0, 0, 3.141068935394287, 0, 1.8621559143066406, 0.22295939922332764, 0, 0.6911113262176514, 0, 1.8179839849472046, 1.6182421445846558, 1.5211284160614014, 0.8695023059844971, 1.361924409866333, 0.773
2022-10-13 12:27:33 -07:00
}
2022-10-17 11:32:10 -07:00
}
2022-10-13 12:27:33 -07:00
}
2022-10-17 11:32:10 -07:00
}
2022-10-13 12:27:33 -07:00
}
2022-10-17 11:32:10 -07:00
2022-10-13 12:27:33 -07:00
json_info = es.search(index = "snl-ghostrunner-*", body = script_query, size = 20)
2022-10-17 11:32:10 -07:00
elastic_docs = json_info["hits"]["hits"]
with open('data.json', 'w', encoding='utf-8') as f:
json.dump(elastic_docs, f, ensure_ascii=False, indent=4)
if os.path.isdir(f'{input_path}') == False:
os.makedirs(f'{input_path}')
for num, doc in enumerate(tqdm(elastic_docs)):
print(doc)
join_start = start=time.time()
url_of_image = str(doc['_source']['s3_presigned'])
score = doc['_score']
instances_id = doc['_id']
index = doc['_index']
full_file_name = os.path.join(input_path, f"{score}{instances_id}.jpg")
urllib.request.urlretrieve(url_of_image, full_file_name)