Doris详细介绍与使用之查询语法（三）-尧图网站建设

📅 发布时间：2026/6/24 9:14:11

doris的查询语法

1.0 查询语法整体结构

SELECT [ALL | DISTINCT | DISTINCTROW ] -- 对查询字段的结果是否需要去重，还是全部保留等参数 select_expr [, select_expr ...] -- select的查询字段 [FROM table_references [PARTITION partition_list] -- from 哪个库里面的那张表甚至哪一个(几个)分区 [WHERE where_condition] -- WHERE 查询 [GROUP BY {col_name | expr | position} -- group by 聚合 [ASC | DESC], ... [WITH ROLLUP]] [HAVING where_condition] -- having 针对聚合函数的再一次过滤 [ORDER BY {col_name | expr | position} -- 对结果数据按照字段进行排序 [ASC | DESC], ...] -- 排序规则 [LIMIT {[offset,] row_count | row_count OFFSET offset}] -- 限制输出多少行内容 [INTO OUTFILE 'file_name'] -- 将查询的结果导出到文件中

1.1 doris内置函数

1.1.1条件函数

1.1.1.1 if函数

语法示例：

if(boolean condition, type valueTrue, type valueFalseOrNull) --如果表达式 condition 成立，返回结果 valueTrue；否则，返回结果 valueFalseOrNull --返回值类型：valueTrue 表达式结果的类型

mysql> select user_id, if(user_id = 1, "true", "false") as test_if from test; +---------+---------+ | user_id | test_if | +---------+---------+ | 1 | true | | 2 | false | +---------+---------+ select if(1<2,true,false); select if(1<2,100,200);

1.1.1.2 ifnull,nvl,coalesce,nullif函数

语法示例：

ifnull(expr1, expr2) --如果 expr1 的值不为 NULL 则返回 expr1，否则返回 expr2 select ifnull(null,200); select ifnull(100,200); nvl(expr1, expr2) --如果 expr1 的值不为 NULL 则返回 expr1，否则返回 expr2 select nvl(100,200); coalesce(expr1, expr2, ...., expr_n)) --返回参数中的第一个非空表达式（从左向右） select coalesce(100,200,300); select coalesce(null,null,300); nullif(expr1, expr2) -- 如果两个参数相等，则返回NULL。否则返回第一个参数的值 select nullif(100,100); select nullif(100,200);

示例：

# IFNULL、NVL：两个参数，第一个为空就返回第二个。 # COALESCE：多个参数，从左往右返回第一个不为空的值。 -- NULLIF (a,b): 如果 a 和 b 相等就返回 null，不相等就返回 a。 select ifnull(1,0); -- 1 select nvl(null,20);-- 20 select coalesce(null,null,'0000');-- 0000 select coalesce(null,null,null,'0000',null); -- 0000 select coalesce(null,null,2,'0000',null); -- 2 select nullif(1,1); -- null select nullif(1,0); -- 1

1.1.1.3 case

语法示例：

-- 方式一 CASE expression WHEN condition1 THEN result1 [WHEN condition2 THEN result2] ... [WHEN conditionN THEN resultN] [ELSE result] END -- 方式二 CASE WHEN condition1 THEN result1 [WHEN condition2 THEN result2] ... [WHEN conditionN THEN resultN] [ELSE result] END -- 将表达式和多个可能的值进行比较，当匹配时返回相应的结果

示例：

mysql> select user_id, case user_id when 1 then 'user_id = 1' when 2 then 'user_id = 2' else 'user_id not exist' end as test_case from test; +---------+-------------+ | user_id | test_case | +---------+-------------+ | 1 | user_id = 1 | | 2 | user_id = 2 | | 3 | 'user_id not exist' | +---------+-------------+ mysql> select user_id, case when user_id = 1 then 'user_id = 1' when user_id = 2 then 'user_id = 2' else 'user_id not exist' end as test_case from test; +---------+-------------+ | user_id | test_case | +---------+-------------+ | 1 | user_id = 1 | | 2 | user_id = 2 | +---------+-------------+ select *,case sex when 1 then '男' when 0 then '女' end 性别 from test.ex_user; select *,case when sex = 1 then '男' when sex = 0 then '女' end 性别 from test.ex_user; select case when city='北京' then 'BJ' when city ='上海' then 'SH' else 'Other' end 城市 from ex_user; select case city when '北京' then 'BJ' when '上海' then 'SH' else 'Other' end 城市 from ex_user;

1.1.2聚合函数

1.1.2.1 min,max,sum,avg,count

1.1.2.2 min_by和max_by

MAX_BY(expr1, expr2) 返回expr2最大值所在行的 expr1 （求分组top1的简介函数）

语法示例：

CREATE TABLE tbl1 ( k1 INT, k2 INT, k3 INT, k4 INT ); INSERT INTO tbl1 VALUES (0, 3, 2, 100), (1, 2, 3, 4), (4, 3, 2, 2), (3, 4, 2, 1); select * from tbl1; --取k4这个列中的最大值对应的k1这个列的值 -- 先找到k4列中最大的值，是100，返回100所在的行的k1 值。 select max_by(k1,k4) from tbl1;

练习：

-- 在虚拟机上建表 vi score.txt zss,chinese,99 zss,math,89 zss,English,79 lss,chinese,88 lss,math,88 lss,English,22 www,chinese,99 www,math,45 zll,chinese,23 zll,math,88 zll,English,80 www,English,94 在datagrip上建表： -- 建表语句 create table test.score ( name varchar(50), subject varchar(50), score double ) DUPLICATE KEY(name) DISTRIBUTED BY HASH(name) BUCKETS 1 PROPERTIES ( "replication_num" = "1" ); -- 通过本地文件的方式导入数据 curl --location-trusted -u root:123456 \ -H "column_separator:," \ -H "columns:name,subject,score" \ -T /home/doris_data/score.txt \ -XPUT http://hadoop11:8030/api/test/score/_stream_load

-- 求每门课程成绩最高分的那个人

-- 两种写法 SELECT * FROM test.score; with t as( select subject,max(score) maxScore from test.score group by subject ) select name,t.subject,maxScore from t join test.score s on t.subject=s.subject and t.maxScore=s.score; with t as ( select *,max(score) over(partition by subject ) maxscore from test.score ) select * from t where score=maxscore; -- 这个无法处理成绩并列的情况。 select max_by(name,score) from score;

1.1.2.3 group_concat

求：每一个人有考试成绩的所有科目

# 求：每一个人有考试成绩的所有科目 select name,group_concat(subject,',') as all_subject from score group by name; -- hive的写法 select name,concat_ws(",",collect_list(subject)) from test.score group by name

语法示例：

VARCHAR GROUP_CONCAT([DISTINCT] VARCHAR 列名[, VARCHAR sep]
该函数是类似于 sum() 的聚合函数，group_concat 将结果集中的多行结果连接成一个字符串
-- group_concat对于收集的字段只能是string，varchar，char类型
--当不指定分隔符的时候，默认使用 ','
VARCHAR ：代表GROUP_CONCAT函数返回值类型
[DISTINCT]：可选参数，针对需要拼接的列的值进行去重
[, VARCHAR sep]：拼接成字符串的分隔符，默认是 ','

--建表 create table test.example( id int, name varchar(50), age int, gender string, is_marry boolean, marry_date date, marry_datetime datetime )engine = olap distributed by hash(id) buckets 3 PROPERTIES ( "replication_num" = "1" ); --插入数据 insert into example values (1,'zss',18,'male',0,null,null), (2,'lss',28,'female',1,'2022-01-01','2022-01-01 11:11:11'), (3,'ww',38,'male',1,'2022-02-01','2022-02-01 11:11:11'), (4,'zl',48,'female',0,null,null), (5,'tq',58,'male',1,'2022-03-01','2022-03-01 11:11:11'), (6,'mly',18,'male',1,'2022-04-01','2022-04-01 11:11:11'), (7,null,18,'male',1,'2022-05-01','2022-05-01 11:11:11'); --当收集的那一列，有值为null时，他会自动将null的值过滤掉 select gender, group_concat(name,',') as gc_name from example group by gender; +--------+---------------+ | gender | gc_name | +--------+---------------+ | female | zl,lss | | male | zss,ww,tq,mly | +--------+---------------+ 经过测试：hive中的函数也能写： select gender,concat_ws(',',collect_list(name)) from test.example group by gender; select gender,group_concat(distinct age) from example group by gender; +--------+------------+ | gender | gc_age | +--------+------------+ | female | 48, 28 | | male | 58, 38, 18 | +--------+------------+

1.1.2.4 collect_list，collect_set (1.2版本上线)

语法示例：

ARRAY<T> collect_list(expr)
--返回一个包含 expr 中所有元素(不包括NULL)的数组，数组中元素顺序是不确定的。
ARRAY<T> collect_set(expr)
--返回一个包含 expr 中所有去重后元素(不包括NULL)的数组，数组中元素顺序是不确定的。
select gender,collect_list(age) from example group by gender;
select gender,collect_set(age) from example group by gender;

1.1.3日期函数

1.1.3.1 获取当前时间

curdate，current_date，now，curtime，current_time，current_timestamp

示例：

select current_date(); -- 2026-06-23 select curdate(); -- 2026-06-23 select now();-- 2026-06-23 19:17:13 select curtime(); -- 19:17:13 select current_timestamp(); -- 2026-06-23 19:17:13

1.1.3.2last_day(1.2版本上线)

语法：

DATE last_day(DATETIME date) -- 返回输入日期中月份的最后一天； --'28'(非闰年的二月份), --'29'(闰年的二月份), --'30'(四月，六月，九月，十一月), --'31'(一月，三月，五月，七月，八月，十月，十二月) select last_day('2024-12-23'); select last_day('2024-12-23 18:19:36');

1.1.3.3 from_unixtime

语法：将时间戳转换为年月日时分秒

DATETIME FROM_UNIXTIME(INT unix_timestamp[, VARCHAR string_format]) -- 将 unix 时间戳转化为对应的 time 格式，返回的格式由 string_format 指定 --支持date_format中的format格式，默认为 %Y-%m-%d %H:%i:%s -- 正常使用的三种格式 yyyyMMdd yyyy-MM-dd yyyy-MM-dd HH:mm:ss

示例：

select from_unixtime(1196440219); -- 时区 2007-12-01 00:30:19 select from_unixtime(1196440219, 'yyyy-MM-dd HH:mm:ss'); -- 2007-12-01 00:30:19 select from_unixtime(1196440219, '%Y-%m-%d'); -- 2007-12-01

1.1.2.4unix_timestamp

语法

UNIX_TIMESTAMP(),
UNIX_TIMESTAMP(DATETIME date),
UNIX_TIMESTAMP(DATETIME date, STRING fmt) -- 给一个日期，指定这个日期的格式
-- 将日期转换成时间戳，返回值是一个int类型

示例：

-- 获取当前日期的时间戳 select unix_timestamp(); -- 1782213709 -- 获取指定日期的时间戳 select unix_timestamp('2022-11-26 01:09:01'); -- 1669396141 -- 给定一个特殊日期格式的时间戳，指定格式 select unix_timestamp('2022-11-26 01:09-01', '%Y-%m-%d %H:%i-%s'); -- 1669396141.000000 select unix_timestamp('2022年11月26 01:09-01', '%Y年%m月%d %H:%i-%s'); -- 1669396141.000000

1.1.3.5 to_date

语法：

DATE TO_DATE(DATETIME)
--返回 DATETIME 类型中的日期部分。

select to_date("2022-11-20 00:00:00"); -- 2022-11-20 select from_unixtime(unix_timestamp('2022-11-20 00:00:00','%Y-%m-%d %H:%i:%s'),'yyyy-MM-dd'); -- 2022-11-20 select substr("2022-11-20 00:00:00",1,10); -- 2022-11-20

1.1.3.6extract

语法：

extract(unit FROM DATETIME) --抽取
-- 提取DATETIME某个指定单位的值。
--unit单位可以为year, month, day, hour, minute或者second

select extract(year from '2022-09-22 17:01:30') as year, extract(month from '2022-09-22 17:01:30') as month, extract(day from '2022-09-22 17:01:30') as day, extract(hour from '2022-09-22 17:01:30') as hour, extract(minute from '2022-09-22 17:01:30') as minute, extract(second from '2022-09-22 17:01:30') as second;

select year('2022-09-22 17:01:30'); 使用这种方法比上面的简单

1.1.3.7date_add，date_sub,datediff

语法：

DATE_ADD(DATETIME date,INTERVAL expr type) DATE_SUB(DATETIME date,INTERVAL expr type) DATEDIFF(DATETIME expr1,DATETIME expr2) -- 计算两个日期相差多少天，结果精确到天，不考虑时分秒之间的差值。 -- 向日期添加指定的时间间隔。 -- date 参数是合法的日期表达式。 -- expr 参数是您希望添加的时间间隔。 -- type 参数可以是下列值：YEAR, MONTH, DAY, HOUR, MINUTE, SECOND

select date_add('2010-11-30 23:59:59', INTERVAL 2 DAY); +-------------------------------------------------+ | date_add('2010-11-30 23:59:59', INTERVAL 2 DAY) | +-------------------------------------------------+ | 2010-12-02 23:59:59 | +-------------------------------------------------+ --传一个负数进去也就等同于date_sub select date_add('2010-11-30 23:59:59', INTERVAL -2 DAY); +--------------------------------------------------+ | date_add('2010-11-30 23:59:59', INTERVAL -2 DAY) | +--------------------------------------------------+ | 2010-11-28 23:59:59 | +--------------------------------------------------+ mysql> select datediff('2022-11-27 22:51:56','2022-11-24 22:50:56'); +--------------------------------------------------------+ | datediff('2022-11-27 22:51:56', '2022-11-24 22:50:56') | +--------------------------------------------------------+ | 3 | +--------------------------------------------------------+

1.1.3.8 date_format

语法：

VARCHAR DATE_FORMAT(DATETIME date, VARCHAR format)
--将日期类型按照format的类型转化为字符串

select date_format('2007-10-04 22:23:00', '%H:%i:%s'); +------------------------------------------------+ | date_format('2007-10-04 22:23:00', '%H:%i:%s') | +------------------------------------------------+ | 22:23:00 | +------------------------------------------------+ select date_format('2007-10-04 22:23:00', 'yyyy-MM-dd'); +------------------------------------------------+ | date_format('2007-10-04 22:23:00', '%Y-%m-%d') | +------------------------------------------------+ | 2007-10-04 | +------------------------------------------------+ select date_format('2007-10-04 22:23:00', '%Y-%m'); 2007-10

1.1.4 字符串函数

1.1.4.1 length，lower，upper，reverse

示例：

获取到字符串的长度，对字符串转大小写和字符串的反转

1.1.4.2 lpad，rpad（填充数据）

语法：
VARCHAR rpad(VARCHAR str, INT len, VARCHAR pad)
VARCHAR lpad(VARCHAR str, INT len, VARCHAR pad)
-- 返回 str 中长度为 len（从首字母开始算起）的字符串。
--如果 len 大于 str 的长度，则在 str 的后面不断补充 pad 字符，
--直到该字符串的长度达到 len 为止。如果 len 小于 str 的长度，
--该函数相当于截断 str 字符串，只返回长度为 len 的字符串。
--len 指的是字符长度而不是字节长度。

-- 向左边补齐 SELECT lpad("1", 5, "0"); +---------------------+ | lpad("1", 5, "0") | +---------------------+ | 00001 | +---------------------+ -- 向右边补齐 SELECT rpad('11', 5, '0'); +---------------------+ | rpad('11', 5, '0') | +---------------------+ | 11000 | +---------------------+ select lpad("1",5,"0"); select rpad("1",5,"0"); select lpad("abcdefg",5,""); select lpad("abc",5,"#");

1.1.4.3 concat，concat_ws

语法：

select concat("a", "b"); +------------------+ | concat('a', 'b') | +------------------+ | ab | +------------------+ select concat("a", "b", "c"); +-----------------------+ | concat('a', 'b', 'c') | +-----------------------+ | abc | +-----------------------+ -- concat中，如果有一个值为null，那么得到的结果就是null mysql> select concat("a", null, "c"); +------------------------+ | concat('a', NULL, 'c') | +------------------------+ | NULL | +------------------------+ --使用第一个参数 sep 作为连接符 --将第二个参数以及后续所有参数(或ARRAY中的所有字符串)拼接成一个字符串。 -- 如果分隔符是 NULL，返回 NULL。 concat_ws函数不会跳过空字符串，会跳过 NULL 值。 mysql> select concat_ws("_", "a", "b"); +----------------------------+ | concat_ws("_", "a", "b") | +----------------------------+ | a_b | +----------------------------+ mysql> select concat_ws(NULL, "d", "is"); +----------------------------+ | concat_ws(NULL, 'd', 'is') | +----------------------------+ | NULL | +----------------------------+

1.1.4.4 substr（截取）

语法：

--求子字符串，返回第一个参数描述的字符串中从start开始长度为len的部分字符串。 --首字母的下标为1。 mysql> select substr("Hello doris", 2, 1); +-----------------------------+ | substr('Hello doris', 2, 1) | +-----------------------------+ | e | +-----------------------------+ mysql> select substr("Hello doris", 1, 2); +-----------------------------+ | substr('Hello doris', 1, 2) | +-----------------------------+ | He | +-----------------------------+

1.1.4.5 ends_with，starts_with

语法：

BOOLEAN ENDS_WITH (VARCHAR str, VARCHAR suffix)
--如果字符串以指定后缀结尾，返回true。否则，返回false。
--任意参数为NULL，返回NULL。
BOOLEAN STARTS_WITH (VARCHAR str, VARCHAR prefix)
--如果字符串以指定前缀开头，返回true。否则，返回false。
--任意参数为NULL，返回NULL。

示例：

select ends_with("Hello doris", "doris"); -- 1 select ends_with("Hello doris", "Hello");-- 0 select starts_with("hello world","hello");-- 1 select starts_with("hello world","world");-- 0

5.1.4.6 trim，ltrim，rtrim

语法：

VARCHAR trim(VARCHAR str) -- 将参数 str 中左侧和右侧开始部分连续出现的空格去掉 mysql> SELECT trim(' ab d ') str; +------+ | str | +------+ | ab d | +------+ VARCHAR ltrim(VARCHAR str) -- 将参数 str 中从左侧部分开始部分连续出现的空格去掉 mysql> SELECT ltrim(' ab d') str; +------+ | str | +------+ | ab d | +------+ VARCHAR rtrim(VARCHAR str) --将参数 str 中从右侧部分开始部分连续出现的空格去掉 mysql> SELECT rtrim('ab d ') str; +------+ | str | +------+ | ab d | +------+

1.1.4.7 null_or_empty，not_null_or_empty

select null_or_empty(null); -- 1 select null_or_empty("");-- 1 select not_null_or_empty("a");-- 1

1.1.4.8 replace

VARCHAR REPLACE (VARCHAR str, VARCHAR old, VARCHAR new) -- 将str字符串中的old子串全部替换为new串 mysql> select replace("http://www.baidu.com:9090", "9090", ""); +------------------------------------------------------+ | replace('http://www.baidu.com:9090', '9090', '') | +------------------------------------------------------+ | http://www.baidu.com: | +------------------------------------------------------+

1.1.4.9 split_part

VARCHAR split_part(VARCHAR content, VARCHAR delimiter, INT field) -- 根据分割符拆分字符串, 返回指定的分割部分(从1开始计数)。 mysql> select split_part("hello world", " ", 1); +----------------------------------+ | split_part('hello world', ' ', 1) | +----------------------------------+ | hello | +----------------------------------+ mysql> select split_part("hello world", " ", 2); +----------------------------------+ | split_part('hello world', ' ', 2) | +----------------------------------+ | world | +----------------------------------+ mysql> select split_part("2019年7月8号", "月", 1); +-----------------------------------------+ | split_part('2019年7月8号', '月', 1) | +-----------------------------------------+ | 2019年7 | +-----------------------------------------+ mysql> select split_part("abca", "a", 1); +----------------------------+ | split_part('abca', 'a', 1) | +----------------------------+ | | +----------------------------+

1.1.4.10 money_format

VARCHAR money_format(Number) -- 将数字按照货币格式输出，整数部分每隔3位用逗号分隔，小数部分保留2位 select money_format(17014116);-- 17,014,116.00 select money_format(1123.456); -- 1,123.46 select money_format(1123.4);-- 1,123.40