记一次excel操作的代码

前言

应导师要求,需要处理一批excel数据,数据不方便透露,因此以下代码对各位读者应该是没什么用的,这也是为什么我将这篇文章分区至 念念碎 的原因。代码编写历时约5h(没办法,本人太菜了),使用工具为matlab,由于数据有缺失,凡是缺失的地方均赋值-10000,以方便做后期处理

现分享如下

需求1

这里的数据值缺失的少,故采取人工赋值的方式修复了excel,以便继续运行代码

首先定义了一个类fun.m

% time => 2021/11/26
% 北京 => 北京 => 54511 => 1
% 广东 => 深圳 => 59493 => 2
% 江苏 => 无锡 => 58354 => 3
% 新疆 => 乌鲁木齐 => 51469 => 4
classdef fun
    methods(Static)
        % 方法 read_Files_name
        % 输入 Path 路径 suffix 后缀名
        % 返回 FileNames 符合后缀名的文件名string数组,支持空后缀
        function FileNames = read_Files_name(Path,suffix)
            if isempty(suffix) % 如果没有后缀
                remove_arg = [];
                File = dir(fullfile(Path,suffix));  % 显示文件夹下所有符合后缀名的完整信息
                FileNames = {File.name}';           % 提取文件名,转换为n行1列
                for i=1:length(FileNames(:))
                    if ~contains(FileNames(i),'.')==0
                        remove_arg(end+1)=i;
                    end
                end
                FileNames(remove_arg)=[];
                FileNames = string(FileNames);
            else % 如果有后缀
                File = dir(fullfile(Path,suffix));  % 显示文件夹下所有符合后缀名的完整信息
                FileNames = {File.name}';           % 提取文件名,转换为n行1列
                FileNames = string(FileNames);
            end
        end

        % 方法 find_PATH
        % 输入 void
        % 返回 Path 当前脚本文件所在的路径
        function Path = find_PATH()
            fullpath = mfilename('fullpath');
            [Path,~]=fileparts(fullpath);
        end

        % 方法 joint_Path
        % 输入 Path_father 父路径 FileNames_father 拼接的字符串数组
        % 返回 Path 拼接后的路径
        function Path = joint_Path(Path_father, FileNames_father)
            for i = 1:length(FileNames_father(:))
                Path(i,:) = string(Path_father) + '\' + FileNames_father(i);
            end
            Path = char(Path);
        end

        % 需要对应 Path_son 制作相应数组存储
        % 这样做的目的是防止某个文件夹数据缺少
        % Path_son_str = string(Path_son);
        % for i=1:length(Path_son_str(:))
        %     FileNames_son = fun.read_Files_name(Path_son(i,:),suffix);
        %     eval(['TemporaryVariable_' , num2str(i), '=','FileNames_son;']);
        % end

        % 方法 data_sorting
        % 输入 太多了懒得写了
        % 返回 DATA 整理后的data
        function DATA = data_sorting(city_name,temporary_data,need_data,num_ID,num_column)
            for i = 1:size(need_data,1)/num_ID
                data_beijing(i,:) = need_data((i - 1) * 4 + 1, num_column - 2);
                data_shenzhen(i,:) = need_data((i - 1) * 4 + 2, num_column - 2);
                data_wuxi(i,:) = need_data((i - 1) * 4 + 3, num_column - 2);
                data_wulumuqi(i,:) = need_data((i - 1) * 4 + 4, num_column - 2);
            end
            data_row_in = [data_beijing,data_shenzhen,data_wuxi,data_wulumuqi];
            data_row_in = string(data_row_in);
            data_row_in = [string(city_name)'; data_row_in];
            for i = 1:size(need_data,1)/num_ID
                date_data(i,:) = temporary_data((i - 1) * 4 + 2, 1);
            end
            date_data = [temporary_data(1, num_column); date_data];
            DATA = [date_data, data_row_in];
        end

        % 方法 data_sorting
        % 输入 num_column,DATA_ALL,need_data,num_ID
        % 返回 DATA 整理后的data
        function DATA = data_city_sorting(num_column,DATA_ALL,need_data,num_ID)
            for i = 1:size(need_data,2)
                data_title(i) = DATA_ALL(1,(num_ID+1)*(i-1)+1);
                DATA(:,i) = DATA_ALL(:,num_column+5*(i-1));
            end
            DATA(1,:) = data_title;
            DATA = [DATA_ALL(:,1),DATA];
            DATA(1,1) = '日期';
        end

    end
end

主函数分两步走

main_1.m

clc,clear % 清空控制台
% 时间度过大,故先初步整理数据
% 参数设置
suffix_none = '';
suffix = '*.xls';
range = 'D1:D4';
range_rain = 'A1:B4';
num_ID = 4;
city_name = char('北京','深圳','无锡','乌鲁木齐');
% 获取 .m 文件所在 位置
Path_father = fun.find_PATH();

% 获取 位置 中 文件夹名字
FileNames_father = fun.read_Files_name(Path_father,suffix_none);

% 获取 各文件夹 中 符合后缀的文件名字
Path_son = fun.joint_Path(Path_father, FileNames_father);

% 数组存储excel文件名
% 很幸运,我们得到的数据是完整的,所以我们这么做,否则参见 fun.m 的注释
Path_son_str = string(Path_son);
TemporaryVariable = fun.read_Files_name(Path_son(1,:),suffix);

% 糟糕的是,降雨量这个数据格式不一样的,需要特殊处理,首先找出它所在的索引
for rainfall_where = 1:size(Path_son)
    if ~contains(Path_son_str(rainfall_where),'降雨') == 0
        break
    end
end

% for 读取 excel
% 这样做可以防止二进制文件名排序问题,参见数据结构,很幸运我们这里没有出现这种问题
% 注意,如果后期发现各个城市的排序不一致需要更改代码,通过find会导致for循环需要乘4个以上的时间度
need_data = [];
for j = 1:size(FileNames_father ,1)
    temporary_arg = [];
    for i = 1: length(TemporaryVariable(:))
        %     for i = 1: 3
        if j ~= rainfall_where
            % strrep 以去除字符串数组中的空格,防止出错,第二个''一定要记得空一格
            [temporary_data, ~, ~] =...
                xlsread(strrep(Path_son_str(j), ' ', '') + '\' + TemporaryVariable(i),'',range);
            temporary_arg = [temporary_arg;temporary_data];
            if size(temporary_data,1) ~=4
                disp(i);
                error(strrep(Path_son_str(j), ' ', '') + '\' + TemporaryVariable(i) + '格式不符合要求');
            end
        else
            try
                [~,~,temporary_data] =...
                    xlsread(strrep(Path_son_str(j), ' ', '') + '\' + TemporaryVariable(i),'',range_rain);
                % matlab 没有三元运算符,挺麻烦的这里
                for rain_i = 1:num_ID
                    if isempty(find(strcmp(temporary_data,strrep(city_name(rain_i,:),' ', '')), 1)) == 0
                        rain_idx(rain_i) =...
                            find(strcmp(temporary_data,strrep(city_name(rain_i,:),' ', '')));
                    else
                        rain_idx(rain_i) = -1;
                    end
                end
                for rain_i = 1:num_ID
                    if rain_idx(rain_i) ~= -1
                        rain_idx_arg(rain_i)  = cell2mat(temporary_data(rain_idx(rain_i),2));
                    else
                        rain_idx_arg(rain_i)  = 0;
                    end
                end
                temporary_rain_data = rain_idx_arg';
            catch
                temporary_rain_data = [-10000;-10000;-10000;-10000];
            end
            temporary_arg = [temporary_arg;temporary_rain_data];
        end
    end
    need_data = [need_data,temporary_arg];
end

% 为每个数组首列 添加 日期
j = 1;jj = 1;
for i = 1:size(need_data,1)
    date_notFORAT = char(TemporaryVariable(j));
    % 取文件名前 8 位
    date_notFORAT = str2num(string(date_notFORAT(1:8)));
    % 格式化日期
    date_FORAT = datetime(date_notFORAT,'ConvertFrom','yyyymmdd','format','yyyy-MM-dd');
    FORAT_date_data(i,:) = [string(date_FORAT),city_name(jj,:),need_data(i,:)];
    jj = jj + 1;
    if mod(i,num_ID) == 0
        j = j + 1;
        jj = 1;
    end
end

% 为每个数组首行 添加 标题
data_name = ["日期","城市"];
for i = 1:size(FileNames_father,1)
    data_name = [data_name ,FileNames_father(i)];
end
FORAT_title_data = [data_name;FORAT_date_data];

main_2.m

clc,clear;
range_result = 'A1:J1461';
num_ID = 4;
city_name = char('北京','深圳','无锡','乌鲁木齐');
[~, ~,temporary_data] = xlsread('result.xlsx','',range_result);
need_data = temporary_data(2:1461,3:10);
% 平均气温
num_column = 3;
DATA_average_temperature = fun.data_sorting(city_name,temporary_data,need_data,num_ID,num_column);
% 日照时间
num_column = 4;
DATA_sunshine_time = fun.data_sorting(city_name,temporary_data,need_data,num_ID,num_column);
% 最低气温
num_column = 5;
DATA_lowest_temperature = fun.data_sorting(city_name,temporary_data,need_data,num_ID,num_column);
% 最高气温
num_column = 6;
DATA_highest_temperature = fun.data_sorting(city_name,temporary_data,need_data,num_ID,num_column);
% 气压
num_column = 7;
DATA_Air_pressure = fun.data_sorting(city_name,temporary_data,need_data,num_ID,num_column);
% 湿度
num_column = 8;
DATA_humidity = fun.data_sorting(city_name,temporary_data,need_data,num_ID,num_column);
% 降雨
num_column = 9;
DATA_rainfall = fun.data_sorting(city_name,temporary_data,need_data,num_ID,num_column);
% 风速
num_column = 10;
DATA_wind_peed = fun.data_sorting(city_name,temporary_data,need_data,num_ID,num_column);
% 汇总
DATA_ALL = [DATA_average_temperature,DATA_sunshine_time, DATA_lowest_temperature,...
    DATA_highest_temperature, DATA_Air_pressure, DATA_humidity, DATA_rainfall, DATA_wind_peed];
% 北京
num_column = 2;
DATA_Beijing = fun.data_city_sorting(num_column,DATA_ALL,need_data,num_ID);
% 深圳
num_column = 3;
DATA_shenzhen = fun.data_city_sorting(num_column,DATA_ALL,need_data,num_ID);
% 无锡
num_column = 4;
DATA_wuxi = fun.data_city_sorting(num_column,DATA_ALL,need_data,num_ID);
% 乌鲁木齐
num_column = 5;
DATA_wulumuqi = fun.data_city_sorting(num_column,DATA_ALL,need_data,num_ID);

需求2

同需求1一样的步骤,首先定义了一个类fun.m

% time => 2021/11/27
classdef fun
    methods(Static)
        % 方法 read_Files_name
        % 输入 Path 路径 suffix 后缀名
        % 返回 FileNames 符合后缀名的文件名string数组,支持空后缀
        function FileNames = read_Files_name(Path,suffix)
            if isempty(suffix) % 如果没有后缀
                remove_arg = [];
                File = dir(fullfile(Path,suffix));  % 显示文件夹下所有符合后缀名的完整信息
                FileNames = {File.name}';           % 提取文件名,转换为n行1列
                for i=1:length(FileNames(:))
                    if ~contains(FileNames(i),'.')==0
                        remove_arg(end+1)=i;
                    end
                end
                FileNames(remove_arg)=[];
                FileNames = string(FileNames);
            else % 如果有后缀
                File = dir(fullfile(Path,suffix));  % 显示文件夹下所有符合后缀名的完整信息
                FileNames = {File.name}';           % 提取文件名,转换为n行1列
                FileNames = string(FileNames);
            end
        end

        % 方法 find_PATH
        % 输入 void
        % 返回 Path 当前脚本文件所在的路径
        function Path = find_PATH()
            fullpath = mfilename('fullpath');
            [Path,~]=fileparts(fullpath);
        end

        % 方法 joint_Path
        % 输入 Path_father 父路径 FileNames_father 拼接的字符串数组
        % 返回 Path 拼接后的路径
        function Path = joint_Path(Path_father, FileNames_father)
            for i = 1:length(FileNames_father(:))
                Path(i,:) = string(Path_father) + '\' + FileNames_father(i);
            end
            Path = char(Path);
        end

        % 方法 remove_an_end
        % 输入 TemporaryVariable 文件名
        % 返回 TemporaryVariable_format 删除后缀的文件名
        function TemporaryVariable_format_data = remove_an_end(TemporaryVariable)
            TemporaryVariable_format_data = [];
            TemporaryVariable_char = char(TemporaryVariable);
            for i = 1:size(TemporaryVariable,1)
                index_an_end = strfind(TemporaryVariable_char(i,:),'.');
                TemporaryVariable_format(i,:) = string(TemporaryVariable_char(i,1:index_an_end-1));
            end
            for i = 1:size(TemporaryVariable_format,1)
                temp = [];
                temp = char(TemporaryVariable_format(i));
                if temp(2) ~= '$'
                    TemporaryVariable_format_data = [TemporaryVariable_format_data;TemporaryVariable_format(i)];
                end
            end
        end

        % 方法 format_data_temporary_data
        % 输入 num_column,times_day,temporary_data,need_data,date_data,date_title
        % 返回 DATA 格式化data
        function DATA = format_data_temporary_data(num_column,times_day,temporary_data,need_data,date_data,date_title)
            data_temp_where = string(temporary_data(1,num_column));
            data_temp_what = string(temporary_data(2,num_column));
            for i = 1:(size(temporary_data,1)-2)/times_day
                data_temp_row = need_data(1+times_day*(i-1):1+times_day*(i-1)+times_day-1,num_column-1);
                % 以 行向量 存储
                data_temp(i,:) = data_temp_row';
                % 求最小最大和平均
                data_temp_min = string(min(str2double(data_temp(i,:))));
                data_temp_max = string(max(str2double(data_temp(i,:))));
                data_temp_mean = string(mean(str2double(data_temp(i,:))));
                % 整理
                data_temp_format(i,:) = [data_temp(i,:), data_temp_min, data_temp_max, data_temp_mean];
                data_temp_where_arg(i,:) = data_temp_where;
                data_temp_what_arg(i,:) = data_temp_what;
            end
            DATA = [date_title ;
                data_temp_where_arg,data_temp_what_arg, date_data, data_temp_format];
        end
    end
end

接下来,调用类。写代码的时候,发现部分数据有缺且却的特别多,故用try语句进行计算机赋值-10000处理。降雨这个数据文件同其他文件格式不一致,必须对它做特别处理

整个流程同样分两步走

main_1.m

clc,clear % 清空控制台
% 时间度过大,故先初步整理数据
% 参数设置
suffix_none = '';
suffix = '*.xlsx';
sheet = 'Sheet1';
range = 'A2:C17522';

% 获取 .m 文件所在 位置
Path_father = fun.find_PATH();

% 获取 位置 中 文件夹名字
FileNames_father = fun.read_Files_name(Path_father,suffix_none);

% 获取 各文件夹 中 符合后缀的文件名字
Path_son = fun.joint_Path(Path_father, FileNames_father);
Path_son_str = string(Path_son);
TemporaryVariable = fun.read_Files_name(Path_son(1,:),suffix);
TemporaryVariable_format = fun.remove_an_end(TemporaryVariable);

% 整理所有文件
need_data = [];
for i = 1:size(TemporaryVariable_format,1)
    temporary_data =[];
    [~, ~, temporary_data] =...
        xlsread(strrep(Path_son_str, ' ', '') + '\' + TemporaryVariable_format(i),sheet,range);
    need_data_title = repmat( TemporaryVariable_format(i), 1, size(temporary_data,2));
    temporary_data = [need_data_title; temporary_data];
    need_data = [ need_data, temporary_data];
end
need_data;

main_2.m

clc,clear % 清空控制台
% 参数设置
sheet = 'Sheet1';
range = 'A1:AG17522';
times_day = 48;

% 读取 main_1 保存 need_data 的 excel
[~, ~, temporary_data] = xlsread('first_result.xlsx',sheet,range);
need_data = temporary_data(3:end, 2:end);

% 整理与生成日期序列
for i = 1:(size(temporary_data,1)-2)/times_day
    date_data(i,:) = temporary_data(3+times_day*(i-1),1);
end
date_hours = [hours(0.5):hours(0.5):days(1)];
date_title = ["where","what","times",string(date_hours),"min","max","average"];

% 格式化数据并保存
for i = 2:size(temporary_data,2)
    DATA = [];
    if i>3 && ( mod(i-1,3) == 0 )
        continue;
    end
    num_column = i;
    DATA = fun.format_data_temporary_data(num_column,times_day,temporary_data,need_data,date_data,date_title);
    excel_sheet_add = char(DATA(2,2)+DATA(2,1));
    excel_sheet_add_remove_idx_1 = strfind(excel_sheet_add,'(');
    excel_sheet_add_remove_idx_2 = strfind(excel_sheet_add,')');
    excel_sheet_add_remove_idx_3 = strfind(excel_sheet_add,'2');
    excel_sheet_add_format = [
        excel_sheet_add(1 : excel_sheet_add_remove_idx_1-2),...
        '_',excel_sheet_add(excel_sheet_add_remove_idx_2+1 : excel_sheet_add_remove_idx_3(1)-1)
    ];
    % xlswrite('Result.xlsx',cellstr(DATA),excel_sheet_add_format);
end

暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇