Oracle向量数据库操作的一些随手笔记
mhr18 2024-12-15 11:58 32 浏览 0 评论
1. Basic Demo:
| c(2,6). . b(5,6)
| .
| .
| a(2,2)
|_________________________
|b-a| = sqrt( (5-2)^2 + (6-2)^2 ) = 5
SELECT VECTOR_DISTANCE( vector('[2,2]'), vector('[5,6]'), EUCLIDEAN ) as distance;
How about COSINE?
CREATE TABLE IF NOT EXISTS embedding_store_hysun (
collection_name VARCHAR2(200) NOT NULL,
embedding VECTOR(*, FLOAT32) NOT NULL,
doc CLOB NOT NULL,
src VARCHAR2(500)
);
############################ In database embedding ############################
#EXEC DBMS_VECTOR.DROP_ONNX_MODEL(model_name => 'doc_model', force => true);
#SQL> grant DB_DEVELOPER_ROLE to vector;
SQL> grant create mining model to pocuser;
Grant succeeded.
SQL> create or replace directory HYSUN_DUMP as '/u01/ords_sw/hysun_dump';
Directory HYSUN_DUMP created.
SQL> grant read on directory HYSUN_DUMP to pocuser;
Grant succeeded.
EXECUTE DBMS_VECTOR.LOAD_ONNX_MODEL('HYSUN_DUMP','bge-base-zh-v1.5.onnx','hysun_bge_zh_model',JSON('{"function" : "embedding", "embeddingOutput" : "embedding"}'));
SELECT MODEL_NAME, MINING_FUNCTION, ALGORITHM, ALGORITHM_TYPE, MODEL_SIZE
FROM USER_MINING_MODELS;
SQL> INSERT INTO embedding_store_hysun select 'DB_EMBED_TEST0', VECTOR_EMBEDDING(hysun_bge_zh_model USING 'Minimum Age to Get a Licence The minimum age to get a licence. minimum age' as input), 'Minimum Age to Get a Licence The minimum age to get a licence. minimum age', '/home/hysunhe/projects/oracle_vectordb/source_data/cdc_poc/QA_1.txt' from dual;
1 row inserted.
SQL> INSERT INTO embedding_store_hysun select 'DB_EMBED_TEST0', VECTOR_EMBEDDING(hysun_bge_zh_model USING 'Minimum Requirements for Enrolment The list of requirements/ enrolment prerequisites that needs to be met before enrolment. class 3/3a, Class 3A, class 2B, class 2, minimum requirements, enrolment' as input), 'Minimum Requirements for Enrolment The list of requirements/ enrolment prerequisites that needs to be met before enrolment. class 3/3a, Class 3A, class 2B, class 2, minimum requirements, enrolment', '/home/hysunhe/projects/oracle_vectordb/source_data/cdc_poc/QA_2.txt' from dual;
1 row inserted.
SQL> SELECT VECTOR_EMBEDDING(hysun_bge_zh_model USING 'mininum age to get a license' as input) AS embedding;
SELECT
collection_name,
embedding,
doc,
src,
VECTOR_DISTANCE(embedding, VECTOR_EMBEDDING(hysun_bge_zh_model USING 'mininum age to get a license' as input), COSINE) as distance
FROM embedding_store_hysun
WHERE
collection_name = 'DB_EMBED_TEST0'
ORDER BY distance
FETCH FIRST 3 ROWS ONLY;
######################## In database embedding end ########################
### Index:
show parameter vector_memory_size;
ALTER SYSTEM SET vector_memory_size=ON SCOPE=BOTH;
SELECT value FROM V$PARAMETER WHERE name='sga_target'; -- (max vector_memory_size = 70% SGA)
SELECT CON_ID, sum(alloc_bytes) / 1024 / 1024 FROM V$VECTOR_MEMORY_POOL GROUP BY CON_ID;
SELECT CON_ID, sum(USED_BYTES) / 1024 / 1024 FROM V$VECTOR_MEMORY_POOL GROUP BY CON_ID;
############################################################
In-Memory Neighbor Graph Vector Index(HNSW)
############################################################
create table galaxies (id number, name varchar2(50), doc varchar2(500), embedding vector);
insert into galaxies values (1, 'M31', 'Messier 31 is a barred spiral galaxy in the Andromeda constellation which has a lot of barred spiral galaxies.', '[0,2,2,0,0]');
insert into galaxies values (2, 'M33', 'Messier 33 is a spiral galaxy in the Triangulum constellation.', '[0,0,1,0,0]');
insert into galaxies values (3, 'M58', 'Messier 58 is an intermediate barred spiral galaxy in the Virgo constellation.', '[1,1,1,0,0]');
insert into galaxies values (4, 'M63', 'Messier 63 is a spiral galaxy in the Canes Venatici constellation.', '[0,0,1,0,0]');
insert into galaxies values (5, 'M77', 'Messier 77 is a barred spiral galaxy in the Cetus constellation.', '[0,1,1,0,0]');
insert into galaxies values (6, 'M91', 'Messier 91 is a barred spiral galaxy in the Coma Berenices constellation.', '[0,1,1,0,0]');
insert into galaxies values (7, 'M49', 'Messier 49 is a giant elliptical galaxy in the Virgo constellation.', '[0,0,0,1,1]');
insert into galaxies values (8, 'M60', 'Messier 60 is an elliptical galaxy in the Virgo constellation.', '[0,0,0,0,1]');
insert into galaxies values (9, 'NGC1073', 'NGC 1073 is a barred spiral galaxy in Cetus constellation.', '[0,1,1,0,0]');
SELECT name
FROM galaxies
ORDER BY VECTOR_DISTANCE( embedding, to_vector('[0,1,1,0,0]'), COSINE )
FETCH FIRST 3 ROWS ONLY;
SELECT name,
ROUND( VECTOR_DISTANCE( embedding, to_vector('[0,1,1,0,0]'), COSINE ), 2) as distance
FROM galaxies
ORDER BY distance
FETCH APPROXIMATE FIRST 4 ROWS ONLY;
-- WITH TARGET ACCURACY 90
EXPLAIN PLAN FOR
SELECT name,
VECTOR_DISTANCE( embedding, to_vector('[0,1,1,0,0]'), COSINE ) as distance
FROM galaxies
ORDER BY distance
FETCH APPROXIMATE FIRST 4 ROWS ONLY;
select plan_table_output from table(dbms_xplan.display('plan_table',null,'all'));
CREATE VECTOR INDEX galaxies_hnsw_idx ON galaxies (embedding) ORGANIZATION
INMEMORY NEIGHBOR GRAPH
DISTANCE COSINE
WITH TARGET ACCURACY 95;
CREATE VECTOR INDEX galaxies_hnsw_idx ON galaxies (embedding) ORGANIZATION
INMEMORY NEIGHBOR GRAPH
DISTANCE COSINE
WITH TARGET ACCURACY 90 PARAMETERS (type HNSW, neighbors 40, efconstruction
500);
SELECT name,
ROUND(VECTOR_DISTANCE( embedding, to_vector('[0,1,1,0,0]'), COSINE ), 3) distance
FROM galaxies
WHERE name <> 'NGC1073'
ORDER BY distance
FETCH APPROXIMATE FIRST 4 ROWS ONLY WITH TARGET ACCURACY 90;
drop INDEX galaxies_hnsw_idx;
##############################################################
Neighbor Partition Vector Index (IVF)
##############################################################
CREATE VECTOR INDEX galaxies_ivf_idx ON galaxies (embedding) ORGANIZATION
NEIGHBOR PARTITIONS
DISTANCE COSINE
WITH TARGET ACCURACY 95;
CREATE VECTOR INDEX galaxies_ivf_idx ON galaxies (embedding) ORGANIZATION
NEIGHBOR PARTITIONS
DISTANCE COSINE
WITH TARGET ACCURACY 90 PARAMETERS (type IVF, neighbor partitions 100);
The APPROX and APPROXIMATE keywords are optional. If omitted while connected to an
ADB-S instance, an approximate search using a vector index is attempted if one
exists.
-- Accuracy report
SET SERVEROUTPUT ON
declare
report varchar2(128);
begin
report := dbms_vector.index_accuracy_query(
OWNER_NAME => 'POCUSER',
INDEX_NAME => 'GALAXIES_IVF_IDX',
qv => to_vector('[0,1,1,0,0]'),
top_K => 10,
target_accuracy => 95 );
dbms_output.put_line(report);
end;
/
-- Index detail:
grant read on VECSYS.VECTOR$INDEX to pocuser;
SELECT JSON_SERIALIZE(IDX_PARAMS RETURNING VARCHAR2 PRETTY)
FROM VECSYS.VECTOR$INDEX WHERE IDX_NAME = 'GALAXIES_IVF_IDX';
CREATE PUBLIC DATABASE LINK LinkToLA1 CONNECT TO vectordemo IDENTIFIED BY "welcome1" USING '146.235.233.91:1521/pdb1.sub08030309530.justinvnc1.oraclevcn.com';
select OWNER, DB_LINK, USERNAME, VALID, HOST from all_db_links;
alter session set global_names=false;
select 1 from dual@LINKTOLA1;
#### Memo
grant create any directory to pocuser;
create directory RAG_DOC_DIR as '/u01/hysun/rag_docs';
create table RAG_FILES (
file_name varchar2(500),
file_content BLOB
);
create table RAG_INDB_PIPELINE (
id number,
name varchar2(50),
doc varchar2(500),
embedding VECTOR
);
Declare
mFile VARCHAR2(500) := 'Oracle向量数据库_lab.pdf';
mBLOB BLOB := Empty_Blob();
mBinFile BFILE := BFILENAME('RAG_DOC_DIR', mFile);
Begin
DBMS_LOB.OPEN(mBinFile, DBMS_LOB.LOB_READONLY); -- Open BFILE
DBMS_LOB.CreateTemporary(mBLOB, TRUE, DBMS_LOB.Session); -- BLOB locator initialization
DBMS_LOB.OPEN(mBLOB, DBMS_LOB.LOB_READWRITE); -- Open BLOB locator for writing
DBMS_LOB.LoadFromFile(mBLOB, mBinFile, DBMS_LOB.getLength(mBinFile)); -- Reading BFILE into BLOB
DBMS_LOB.CLOSE(mBLOB); -- Close BLOB locator
DBMS_LOB.CLOSE(mBinFile); -- Close BFILE
INSERT INTO RAG_FILES(file_name, file_content) values (mFile, mBLOB);
commit;
End;
/
insert into RAG_FILES(file_name, file_content) values('oracle-vector-lab', to_blob(bfilename('RAG_DOC_DIR', 'Oracle向量数据库_lab.pdf')));
commit;
select DBMS_LOB.getLength(FILE_CONTENT) from RAG_FILES;
drop table rag_doc_chunks purge;
create table rag_doc_chunks (doc_id varchar2(500), chunk_id number, chunk_data varchar2(4000), chunk_embedding vector);
-- utl_to_text: PDF -> TEXT
-- utl_to_chunks: TEXT -> CHUNKS
-- utl_to_embeddings: CHUNKS -> VECTORS
insert into rag_doc_chunks
select
dt.file_name doc_id,
et.embed_id chunk_id,
et.embed_data chunk_data,
to_vector(et.embed_vector) chunk_embedding
from
rag_files dt,
dbms_vector_chain.utl_to_embeddings(
dbms_vector_chain.utl_to_chunks(
dbms_vector_chain.utl_to_text(dt.file_content),
json('{"normalize":"all"}')
),
json('{"provider":"database", "model":"mydoc_model"}')
) t,
JSON_TABLE(
t.column_value,
'$[*]' COLUMNS (
embed_id NUMBER PATH '$.embed_id',
embed_data VARCHAR2(4000) PATH '$.embed_data',
embed_vector CLOB PATH '$.embed_vector'
)
) et;
commit;
insert into rag_doc_chunks
select
dt.file_name doc_id,
et.embed_id chunk_id,
et.embed_data chunk_data,
to_vector(et.embed_vector) chunk_embedding
from
rag_files dt,
dbms_vector_chain.utl_to_embeddings(
dbms_vector_chain.utl_to_chunks(
dbms_vector_chain.utl_to_text(dt.file_content),
JSON('{ "by":"words",
"max":"240",
"overlap":"15",
"split":"recursively",
"language":"SIMPLIFIED CHINESE",
"normalize":"all" }')
),
json('{"provider":"database", "model":"mydoc_model"}')
) t,
JSON_TABLE(
t.column_value,
'$[*]' COLUMNS (
embed_id NUMBER PATH '$.embed_id',
embed_data VARCHAR2(4000) PATH '$.embed_data',
embed_vector CLOB PATH '$.embed_vector'
)
) et;
commit;
select
dbms_vector_chain.utl_to_chunks(TO_CLOB(FILE_CONTENT),
JSON('{ "by":"words",
"max":"240",
"overlap":"15",
"split":"recursively",
"language":"SIMPLIFIED CHINESE",
"normalize":"all" }'))
from RAG_FILES;
SELECT
dbms_vector.utl_to_embedding(
'This is a test',
json('{
"provider": "OCIGenAI",
"credential_name": "OCI_GENAI_CRED_FOR_APEX",
"url": "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/embedText",
"model": "cohere.embed-multilingual-v3.0"
}')
) embedding
FROM dual;
SELECT
dbms_vector.utl_to_embedding(
'This is a test',
json('{
"provider": "database",
"model": "doc_model"
}')
) embedding
FROM dual;
create or replace directory MODELS_DIR as '/u01/hysun/models';
EXEC DBMS_VECTOR.DROP_ONNX_MODEL(model_name => 'mydoc_model', force => true);
-- BEGIN
-- DBMS_VECTOR.LOAD_ONNX_MODEL(
-- directory => 'MODELS_DIR',
-- file_name => 'bge-base-zh-v1.5.onnx',
-- model_name => 'mydoc_model',
-- metadata => JSON('{"function" : "embedding", "embeddingOutput" : "embedding", "input":{"input": ["DATA"]}}')
-- );
-- END;
-- /
BEGIN
DBMS_VECTOR.LOAD_ONNX_MODEL(
directory => 'MODELS_DIR',
file_name => 'bge-base-zh-v1.5.onnx',
model_name => 'mydoc_model'
);
END;
/
SELECT vector_embedding(mydoc_model using 'hello' as data);
select
chunk_data,
VECTOR_DISTANCE(chunk_embedding, VECTOR_EMBEDDING(mydoc_model USING '本次实验的先决条件' as data), COSINE) as distance
from rag_doc_chunks
order by distance
FETCH APPROX FIRST 1 ROWS ONLY;
-- grant CREATE CREDENTIAL
BEGIN
DBMS_VECTOR_CHAIN.CREATE_CREDENTIAL (
CREDENTIAL_NAME => 'LAB_OPENAI_CRED',
PARAMS => json('{ "access_token": "EMPTY" }')
);
END;
/
select dbms_vector_chain.utl_to_generate_text(
'Oracle 向量数据库是什么',
json('{
"provider": "openai",
"credential_name": "LAB_OPENAI_CRED",
"url": "http://146.235.226.110:8098/v1/chat/completions",
"model": "Qwen2-7B-Instruct"
}') ) from dual;
select *
from (
select
chunk_data
from rag_doc_chunks
order by VECTOR_DISTANCE(chunk_embedding, VECTOR_EMBEDDING(mydoc_model USING '本次实验的先决条件' as data), COSINE)
FETCH APPROX FIRST 3 ROWS ONLY
) dt,
dbms_vector_chain.utl_to_generate_text(
dt.chunk_data,
json('{
"provider": "openai",
"credential_name": "LAB_OPENAI_CRED",
"url": "http://146.235.226.110:8098/v1/chat/completions",
"model": "Qwen2-7B-Instruct"
}')
) rag
declare
l_question varchar2(500) := '本次实验的先决条件';
l_input CLOB;
l_clob CLOB;
j apex_json.t_values;
l_context CLOB;
l_rag_result CLOB;
begin
-- 第一步:从向量数据库中检索出与问题相似的内容
for rec in (
select
chunk_data
from rag_doc_chunks
order by VECTOR_DISTANCE(chunk_embedding, VECTOR_EMBEDDING(mydoc_model USING l_question as data), COSINE)
FETCH APPROX FIRST 3 ROWS ONLY
) loop
l_context := l_context || rec.chunk_data || chr(10);
end loop;
-- 第二步:提示工程:将相似内容和用户问题一起,组成大语言模型的输入
l_input := '你是一个诚实且专业的数据库知识问答助手,请仅仅根据提供的上下文信息内容,回答用户的问题,且不要试图编造答案。\n 以下是上下文信息:' || replace(l_context, chr(10), '\n') || '\n请用英文回答用户问题:' || l_question;
-- 第三步:调用大语言模型,生成RAG结果
for rec in (select dbms_vector_chain.utl_to_generate_text(
l_input,
json('{
"provider": "openai",
"credential_name": "LAB_OPENAI_CRED",
"url": "http://146.235.226.110:8098/v1/chat/completions",
"model": "Qwen2-7B-Instruct"
}')
) as rag from dual) loop
dbms_output.put_line('*** RAG Result: ' || rec.rag);
end loop;
-- apex_json.parse(j, l_clob);
-- l_rag_result := apex_json.get_varchar2(p_path => 'choices[%d].message.content', p0 => 1, p_values => j);
-- dbms_output.put_line('*** RAG Result: ' || l_rag_result);
end;
/
```
srvctl stop instance -d ai23 -i ai232 -force
srvctl status database -d ai23
srvctl start instance -d ai23 -i ai232
相关推荐
- AlmaLinux 9.6发布:升级工具、初步支持IBM Power虚拟化技术
-
IT之家5月21日消息,科技媒体linuxiac昨日(5月20日)发布博文,报道称代号为SageMargay的AlmaLinux9.6发行版已上线,距上一版本9.5发...
- Java最新学习路线,系统全面,零基础适用
-
首先,我个人比较推崇的学习方法是:先学java前段,也就是HTML,css,js,因为学习java以后肯定是往javaee方向发展的,学习完前端,在学习后端很多东西比计较容易理解!其中J2SE是关键...
- 深入理解数据库事务(数据库事务处理的理解)
-
Transaction作为关系型数据库的核心组成,在数据安全方面有着非常重要的作用,本文会一步步解析事务的核心特性,以获得对事务更深的理解。什么是事务数据库几乎是所有系统的核心模块,它将数据有条理地保...
- IvorySQL 4.4 发布(1044mysql)
-
IvorySQL4.4已于2025年3月10日正式发布。新版本全面支持PostgreSQL17.4,新增多项新功能,并修复了已知问题。增强功能PostgreSQL17.3增强功...
- Oracle 与 Google Cloud 携手大幅扩展多云服务
-
据DCD4月10日报道,甲骨文(Oracle)与谷歌云(GoogleCloud)深化合作,全力扩展多云产品。双方计划为OracleDatabaseGoogleCloud解决方案新增11...
- Izzi 利用 Oracle 云提高计费效率和客户体验
-
据thefastmode网5月2日报道,墨西哥电信运营商Izzi宣布采用Oracle云基础设施(OCI),对其业务支持系统(BSS)进行现代化改造增强客户体验,已经成功完成。通过在OCI上运行...
- 好莱坞群星也有明星脸?硅谷科技名人本尊分身比一比
-
假如有部电影齐聚了众科技名人角色,如同许多好莱坞大牌卡司所共同主演的《瞒天过海》(Ocean’sEleven)那样,演出彼此在商场上竞逐、或共同对抗外来竞争捍卫硅谷的故事,更在剧中有不少对手戏,会不...
- 澳大利亚Find My iPhone被黑 多人被黑客锁机
-
FindMyiPhone本来是一个用于协助找回被盗手机的好工具,但是现在,澳洲的苹果用户发现他们的FindMyiPhone变成了黑客的帮凶。昨天,这名自称为OlegPliss的黑客使用Fin...
- 服务器密码错误被锁定怎么解决(服务器密码失效)
-
#服务器密码错误被锁定解决方案当服务器因多次密码错误导致账户被锁定时,可以按照以下步骤进行排查和解决:##一、确认锁定状态###1.检查账户锁定状态(Linux)```bash#查看账户锁定...
- 凌晨突发的数据库重大故障,我排查了一整天……
-
春节期间过得太热闹了,上班确实没啥状态,这不刚发生的一个重大性能故障,排查了整整一天,后面的领导都站成了一排,本次把故障发生的详细分析过程分享给大家!本次故障发生在凌晨,核心应用卡顿非常严重,Orac...
- Oracle锁表紧急处理!3招快速解锁方案
-
开篇:突发故障现场凌晨1点,某电商系统突然卡顿,数千笔支付订单无法完成——数据库出现死锁,技术团队紧急响应...(遇到类似情况的,欢迎在评论区分享经历)一、问题重现:死锁是如何产生的?典型场景:问题根...
- JetBrains DataGrip Mac中文破解版V2025.1下载安装教程
-
DataGripforMac是由JetBrains开发的数据库集成开发环境(IDE),专为数据库管理员和开发人员设计。它支持多种数据库(如MySQL、PostgreSQL、Oracle、SQ...
- 电脑装安卓系统,安卓X86版5.1 RC1下载
-
日前,谷歌放出了Android-x865.1的第一个候选版本Android-x865.1RC1,该版本基于Android5.1.1r24Lollipop开发,更新包括大量x86(32位)代...
- 来来来!一文告诉你Eclipse的正确安装使用姿势,你都清楚吗?
-
前言本学习笔记是有关如何设置Eclipse的详细说明。即使你天天在使用它,但是,相信我,或许你并不足够了解它。安装Java运行时环境Eclipse是Java应用程序,因此设置Eclipse的第一步是安...
- 分享收藏的 oracle 11.2.0.4各平台的下载地址
-
概述oracle11.2.0.4是目前生产环境用的比较多的版本,同时也是很稳定的一个版本。目前官网上已经找不到下载链接了,有粉丝在头条里要求分享一下下载地址。一、各平台下载地址1.1Linuxx...
你 发表评论:
欢迎- 一周热门
- 最近发表
- 标签列表
-
- oracle位图索引 (63)
- oracle批量插入数据 (62)
- oracle事务隔离级别 (53)
- oracle 空为0 (50)
- oracle主从同步 (55)
- oracle 乐观锁 (51)
- redis 命令 (78)
- php redis (88)
- redis 存储 (66)
- redis 锁 (69)
- 启动 redis (66)
- redis 时间 (56)
- redis 删除 (67)
- redis内存 (57)
- redis并发 (52)
- redis 主从 (69)
- redis 订阅 (51)
- redis 登录 (54)
- redis 面试 (58)
- 阿里 redis (59)
- redis 搭建 (53)
- redis的缓存 (55)
- lua redis (58)
- redis 连接池 (61)
- redis 限流 (51)