Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #16

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open

Dev #16

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ npm-debug.log*
yarn-debug.log*
yarn-error.log*

#conf
backend/secrets.py
# Editor directories and files
.idea
.vscode
Expand Down
2 changes: 1 addition & 1 deletion _data/foo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created by Administrator at 2020/4/14 23:18
# Created by imoyao at 2020/4/14 23:18
# // f for fontColor / d for dark / b for bright
# // c for color series
# // r red ;b black ;w white ;p purple ;c cyan ;g green ;y yellow ;
Expand Down
Binary file added _data/lipsticks/999_metal.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/lipsticks/999_zirun.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/lipsticks/Dior/lylj/127266.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/lipsticks/Dior/lylj/127267.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/lipsticks/Dior/lylj/127268.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/lipsticks/Dior/lylj/127269.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/lipsticks/Dior/lylj/127270.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/lipsticks/Dior/lylj/127271.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/lipsticks/Dior/lylj/127272.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/lipsticks/big-jpg.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/lipsticks/many.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/src/mp.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/src/mp_test.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/src/test.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _data/src/test_data.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7,522 changes: 7,522 additions & 0 deletions _data/xiji/products.html

Large diffs are not rendered by default.

40 changes: 7 additions & 33 deletions backend/core/color_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@
import re
from collections import defaultdict
from functools import wraps, partial
import itertools
import json

import yaml
from pypinyin import lazy_pinyin, Style
from backend.libs.zhtools.langconv import Converter

from backend import settings
from backend import settings, utils


class ConvertColor:
Expand Down Expand Up @@ -237,8 +236,8 @@ def parse_lipstick(self):
brand_list = []
for brand in lipstick_data.get('brands'):
series_list = []
brand_zh_name = brand.get('name','')
brand_en_name = brand.get('en_name','')
brand_zh_name = brand.get('name', '')
brand_en_name = brand.get('en_name', '')
if not brand_en_name:
brand_en_name = '_'.join(lazy_pinyin(brand_zh_name))
series_name = brand.get('series')
Expand Down Expand Up @@ -298,7 +297,7 @@ def parse_nippon_color(self, group_data=False, dump_data=False, group_by='color_
color_obj = self.color_object_maker(name, color_hex, color_rgb=color_rgb, pinyin_str=jp_pinyin_str,
color_cmyk=color_cmyk, is_simple=False)
jp_list.append(color_obj)
all_in_one = merge_iterables_of_dict('id', nippor_list, jp_list)
all_in_one = utils.merge_iterables_of_dict('id', nippor_list, jp_list)
if dump_data:
grouped_data = []
if group_data: # 此处只在导出前分组,没有对各组数据分别分组
Expand Down Expand Up @@ -442,7 +441,7 @@ def all_in_one(self, setting_obj, group_data=False, dump_data=False, group_by='c
colors_data = self.parse_flinhong(settings.FLINHONG_COLORS_INFO, dump_data=dump_data)
cfs_color_data = self.parse_cfs_color(settings.CFS_COLOR_INFO, dump_data=dump_data)
# chinese_colors_data 放后面,因为有描述和图片
all_in_one = merge_iterables_of_dict('id', jizhi_data, colors_data, cfs_color_data, chinese_colors_data)
all_in_one = utils.merge_iterables_of_dict('id', jizhi_data, colors_data, cfs_color_data, chinese_colors_data)
# print(type(all_in_one), all_in_one)
print('before_filter:', len(jizhi_data) + len(chinese_colors_data) + len(colors_data) + len(cfs_color_data))
print('after_filter:', len(all_in_one))
Expand Down Expand Up @@ -491,24 +490,6 @@ def group_iterables_of_dicts_in_list(group_key, iterables):
return row_by_key


def merge_iterables_of_dict(shared_key, *iterables):
"""
see also:[🐍PyTricks | Python 中如何合并一个内字典列表? | 别院牧志](https://imoyao.github.io/blog/2020-04-19/python-merge-two-list-of-dicts/)
chinese_colors_data 放前面,因为有描述和图片
:param shared_key:
:param iterables:
:return:
"""
result = defaultdict(dict)
for dictionary in itertools.chain.from_iterable(iterables):
result[dictionary[shared_key]].update(dictionary)
# for dictionary in result.values():
# dictionary.pop(shared_key)
# return result
result = list(result.values()) # 保证返回为list,否则:TypeError: Object of type dict_values is not JSON serializable
return result


def update_by_value(v):
"""
根据 V 值去更新色系数据
Expand Down Expand Up @@ -570,7 +551,7 @@ def set_color_name(new_name):
converter = ConvertColor()


@find_color_series_by_name(name='')
# @find_color_series_by_name(name='')
def find_color_series(rgb_seq): # TODO:此处是否有更好实现?cmyk去判断是否是100%cmy颜色(黑色不判断)
"""
TODO: see also: https://github.com/MisanthropicBit/colorise/blob/master/colorise/color_tools.py
Expand Down Expand Up @@ -659,14 +640,7 @@ def unify_color_dict(color):

if __name__ == '__main__':

color_list = settings.COLOR_BASE_MAP.values()
color_list = [[22, 24, 35], [36, 134, 185], [234, 137, 88], [32, 161, 98], [100, 106, 88]]

for item in color_list:
print(find_color_series(item))
# import colorsys
# print(colorsys.rgb_to_hsv(*item))
# print('rgb_to_hsv:', rgb_to_hsv(item))
# print('rgb_to_hsv_org:', rgb_to_hsv_org(item))
print('------------------')
a = [100, 106, 88]
print(find_color_series(a))
106 changes: 106 additions & 0 deletions backend/core/img_ocr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created by imoyao at 2020/5/1 22:49
import time
from functools import wraps

from PIL import Image
from backend.libs.baidu_api.aip import AipOcr

from backend import secrets, settings


class BaiduOCR:
def __init__(self):
self.client = AipOcr(secrets.APP_ID, secrets.API_KEY, secrets.SECRET_KEY)
self.options = {"language_type": "CHN_ENG", "detect_direction": "true", "detect_language": "true",
"probability": "true"}

@staticmethod
def get_file_content(file_path):
"""
读取图片
"""
with open(file_path, 'rb') as fp:
return fp.read()

def basic_parse(self, fp, set_option=False):
image = self.get_file_content(fp)
option = self.options if set_option else None
ret = self.client.basicGeneral(image, options=option)
return ret

def basic_accurate(self, fp, set_option=False):
image = self.get_file_content(fp)
option = self.options if set_option else None
ret = self.client.basicAccurate(image, options=option)
return ret

def basic_parse_url(self, url, set_option=False):
option = self.options if set_option else None
ret = self.client.basicGeneralUrl(url, options=option)
return ret

def main(self, fp):
ret_data = self.basic_parse(fp)
return ret_data


baidu_ocr = BaiduOCR()


def time_it(func):
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
res = func(*args, **kwargs)
end_time = time.time()
cost_time = end_time - start_time
return func.__name__, res, cost_time

return wrapper


# @time_it
def get_rgb_of_img_getpixel(fp):
"""
此处说这个方法比下面的慢,测试相反,需要进一步验证:https://www.cnblogs.com/chimeiwangliang/p/7130434.html
:param fp:
:return:
"""
img = Image.open(fp)
rgb_color = img.getpixel((96, 720))
return rgb_color


# @time_it
def get_rgb_of_img_load(fp):
im = Image.open(fp) # Can be many different formats.
pix = im.load()
# return im.size # Get the width and hight of the image for iterating over
return pix[96, 720] # Get the RGBA Value of the a pixel of an image
# pix[x, y] = value # Set the RGBA Value of the image (tuple)
# im.save('alive_parrot.png') # Save the modified pixels as .png


if __name__ == '__main__':
# fp = '../../_data/lipsticks/many.png'
fp = '../../_data/lipsticks/big-jpg.png'
''':param

1: {'log_id': 997397551332359778, 'direction': 0, 'words_result_num': 24, 'words_result': [{'words': '01排', 'probability': {'variance': 0.023368, 'average': 0.853212, 'min': 0.640628}}, {'words': '04排', 'probability': {'variance': 0.062432, 'average': 0.81162, 'min': 0.45852}}, {'words': '08', 'probability': {'variance': 0.002019, 'average': 0.946064, 'min': 0.901128}}, {'words': '09', 'probability': {'variance': 0.000805, 'average': 0.951882, 'min': 0.923516}}, {'words': '16', 'probability': {'variance': 0.0, 'average': 0.999542, 'min': 0.999384}}, {'words': 'AN P KAN ROSE FLAMNGO TRUE CORAL SCARLET ROUOE', 'probability': {'variance': 0.047922, 'average': 0.617961, 'min': 0.16106}}, {'words': '10排', 'probability': {'variance': 0.061092, 'average': 0.824526, 'min': 0.474978}}, {'words': '21', 'probability': {'variance': 0.000295, 'average': 0.980994, 'min': 0.963814}}, {'words': '22#', 'probability': {'variance': 3.7e-05, 'average': 0.993604, 'min': 0.985051}}, {'words': '13排', 'probability': {'variance': 0.054925, 'average': 0.815988, 'min': 0.485028}}, {'words': 'CHERRY LUSH VOLET FATALE NA D CORAL', 'probability': {'variance': 0.025493, 'average': 0.680056, 'min': 0.478117}}, {'words': '5', 'probability': {'variance': 0.0, 'average': 0.994346, 'min': 0.994346}}, {'words': '47', 'probability': {'variance': 0.0, 'average': 0.99995, 'min': 0.999927}}, {'words': '49', 'probability': {'variance': 4e-06, 'average': 0.997941, 'min': 0.99594}}, {'words': '15排', 'probability': {'variance': 0.000654, 'average': 0.980801, 'min': 0.944664}}, {'words': '23排', 'probability': {'variance': 0.042638, 'average': 0.852769, 'min': 0.560751}}, {'words': 'SHOWGIRL LLAC NYMPH MSEMAVED WLD ONER BARE PEACH', 'probability': {'variance': 0.019127, 'average': 0.637065, 'min': 0.357038}}, {'words': '35', 'probability': {'variance': 0.0, 'average': 0.999222, 'min': 0.998675}}, {'words': '14排', 'probability': {'variance': 0.002354, 'average': 0.962852, 'min': 0.894295}}, {'words': '7排', 'probability': {'variance': 0.043623, 'average': 0.697632, 'min': 0.488772}}, {'words': '03排', 'probability': {'variance': 0.063906, 'average': 0.812009, 'min': 0.454596}}, {'words': '46', 'probability': {'variance': 0.0, 'average': 0.999721, 'min': 0.999466}}, {'words': 'MSTE SAELE SMOKE', 'probability': {'variance': 0.025206, 'average': 0.776894, 'min': 0.553879}}, {'words': 'NK DUSK CASABLANCE SOMETHNOWLD', 'probability': {'variance': 0.017596, 'average': 0.83199, 'min': 0.609927}}], 'language': -1}

2: {'log_id': 7659937361977788002, 'words_result_num': 24, 'words_result': [{'words': '01#'}, {'words': '04#'}, {'words': '08#'}, {'words': '09#'}, {'words': '16#'}, {'words': 'SPANISH PO NDWN ROSE FLAMINGO TRUE CORAL SCARLET ROUC'}, {'words': '10#'}, {'words': '17#'}, {'words': '21#'}, {'words': '22#'}, {'words': '13#'}, {'words': 'CHERRY LUSH MOLET FATALE NOED CORNL. DOON PNX BLUSH NUDE'}, {'words': '45#'}, {'words': '47#'}, {'words': '49#'}, {'words': '15#'}, {'words': '23#'}, {'words': 'SHOWGIRLLLAC NYMPH MSBEHAVEDWLD ONCER BARE PEACH'}, {'words': '35#'}, {'words': '14#'}, {'words': '7#'}, {'words': '03#'}, {'words': '46#'}, {'words': 'SMELT MSTBRYSABLE SMOKE PINK DUSK CASABLANCESOMETHNOVLD'}]}

3:{'log_id': 3261191746421964930, 'words_result_num': 26, 'words_result': [{'words': '01#'}, {'words': '04#'}, {'words': '08#'}, {'words': '09#'}, {'words': '16#'}, {'words': ' SPANISH PI DNOUN ROSE FLAMINGO TRUECORAL SCARLET ROUO'}, {'words': '10#'}, {'words': '17#'}, {'words': '21#'}, {'words': '22#'}, {'words': '13#'}, {'words': ' CHERRY LUSH VOLET FATALE NACED CORAL FORSDGEN BLUSHNUDE'}, {'words': '45#'}, {'words': '47#'}, {'words': '49#'}, {'words': '15#'}, {'words': '23#'}, {'words': ' SHOWGIRL ULAC NYMPH MSH8HVED WLD ONGER BARE PEACH'}, {'words': '35#'}, {'words': '14#'}, {'words': '7#'}, {'words': '03#'}, {'words': '46#'}, {'words': ' SWCETMYSTERY SABLE SMOKE'}, {'words': ' PINKDUSK'}, {'words': ' CASABLANCE SOMETNGWLD'}]}

4:{'log_id': 4496045203345301378, 'words_result_num': 24, 'words_result': [{'words': '01#'}, {'words': '04#'}, {'words': '08#'}, {'words': '09#'}, {'words': '16#'}, {'words': ' SPAISH NDUN ROSE FLAMINGO TRUE CORAL SCARLET ROUCE'}, {'words': '10#'}, {'words': '17#'}, {'words': '21#'}, {'words': '22#'}, {'words': '13#'}, {'words': ' CHERRY LUSHVOLET FATALE NNCED CORAL FORSDOON BLUSH NUDE'}, {'words': '45#'}, {'words': '47#'}, {'words': '49#'}, {'words': '15#'}, {'words': '23#'}, {'words': ' SHOWGIRL UAC NYMPH MSBEHAV WLD ONNOER BARE PEACH'}, {'words': '35#'}, {'words': '14#'}, {'words': '7#'}, {'words': '03#'}, {'words': '46#'}, {'words': ' TMYSIYSABLE SMOKE PINK DUSK CASABUANCE SOMCTENGVOLD'}]}
'''
bd_ocr = BaiduOCR()
ret = bd_ocr.basic_accurate(fp)
print(ret)
# for file in settings.TEST_IMAGE_FP:
# # print(file)
# colors = get_rgb_of_img_load(file)
# ano_color = get_rgb_of_img_getpixel(file)
# print(colors, ano_color)
102 changes: 102 additions & 0 deletions backend/core/xiji_parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created by Administrator at 2020/5/8 23:24
import os
import re

from backend.core import img_ocr, xiji_spider
from backend import settings, utils
# from backend.core.img_ocr import time_it

lylj = xiji_spider.LYLJ()


# @time_it
def get_item_detail(dir_p):
"""
1. 获取图片rgb值
2. 对数据进行ocr识别,获取描述
3. 对识别结果进行解析处理
4. 组装数据
:param dir_p:
:return:
"""
all_li = []
for root, dirs, files in os.walk(dir_p):
for f in files:
fp = os.path.join(root, f)
fn = os.path.basename(fp)
only_name = fn.split('.')[0]
rgb = img_ocr.get_rgb_of_img_load(fp) # 1
ocr_text = img_ocr.baidu_ocr.basic_accurate(fp) # 2

words_result = ocr_text.get('words_result') # 3
ocr_obj = {}
for index, word in enumerate(words_result):
if '(' in word.get('words'):
subtitle = words_result[index].get('words')
re_ret = re.match(settings.REG_LYLJ_SUBTITLE_EXP, subtitle)
if re_ret:
real_subtitle = re_ret.group(1)
ocr_obj['subtile'] = real_subtitle
desc_list = words_result[index + 1:]
desc = ''.join([desc.get('words') for desc in desc_list])
ocr_obj['desc'] = desc

lipsticks_obj = { # 4
'rgb': rgb,
'id': only_name,
}
lipsticks_obj.update(ocr_obj)
all_li.append(lipsticks_obj)
return all_li


# @time_it
def get_lylj_series():
"""
1. 去网络抓取数据
2. 获取抓取数据的信息
3. 以id为基准进行合并

[{'id': '127266', 'name': '#520',
'src': 'https://img0.xiji.com/images/19/01/5b96671ae769403827ec84b8200805abb36a40a0.jpg?1577773530#w',
'rgb': (247, 0, 83), 'subtile': '爱情水红恋爱中的粉红', 'desc': '这是一支非常有寓意的口红,520我爱你,表白专属色。这是散发着恋爱中粉红泡泡的颜色,暧昧、热恋,都洋溢在唇间。'},
{'id': '127267', 'name': '#080',
'src': 'https://img3.xiji.com/images/19/01/cf3cc958da0bc2b3715b69038ad417ef29523110.jpg?1577773529#w',
'rgb': (220, 2, 3), 'subtile': '微笑正红春晚同款色', 'desc': '这款也是正红偏橘的色调,红多橘少,像是血橙的颜色,清新诱人,更适合日常使用。滋润质地,对唇部非常友好。'},
{'id': '127268', 'name': '#740',
'src': 'https://img3.xiji.com/images/19/01/12248f21bf2a429cce01b41fcfd79b232f391e70.jpg?1577773531#w',
'rgb': (181, 46, 24), 'subtile': '脏橘色南瓜色百搭', 'desc': '网红人气爆款,实力显白,送人送礼佳品,这支口红真的是人见人爱,厚涂也可以hold住!'},
{'id': '127269', 'name': '#888',
'src': 'https://img1.xiji.com/images/19/01/003d9dd19d6612a42b107f427cea2a534c851c42.jpg?1577773531#w',
'rgb': (220, 29, 36), 'subtile': '火焰开运色', 'desc': '888发发发,让人想到热烈的火焰,红红火火。如果觉得正红太艳丽大可选择这款,正红偏橘,非常显白有活力。'},
{'id': '127270', 'name': '#999金属',
'src': 'https://img4.xiji.com/images/19/01/fea134055f2050c0a3c9ad992529aa377b0a722f.jpg?1577773532#w',
'rgb': (168, 15, 9), 'subtile': '人鱼姬正红', 'desc': '已经有999的小仙女一定不能错过这款金属光正红,偏光的微闪人鱼姬色在阳光下不灵不灵的,非常富有层次感。'},
{'id': '127271', 'name': '#999滋润',
'src': 'https://img0.xiji.com/images/19/01/6efcdf3646ab405e003d3feefa2d463ac20b9d3a.jpg?1577773534#w',
'rgb': (201, 2, 5), 'subtile': '经典正红色', 'desc': '颜色最纯正的一款正红色,不挑肤色,喜庆特别显气质。嘴唇状态不好的小仙女一定要选这款,能让唇妆看起来更美腻~'},
{'id': '127272', 'name': '#999哑光',
'src': 'https://img1.xiji.com/images/19/01/ae4fa46845a4db70dc8fe02ed4faa214de12ad1c.jpg?1577773533#w',
'rgb': (190, 18, 14), 'subtile': '经典正红色', 'desc': '李佳琦墙裂推荐的一个色号,每个女人都必须拥有,涂上气场两米八!哑光质地,不偏橘也不偏玫,厚涂薄涂都美到爆炸!'}]
:return:list,
"""

lylj = xiji_spider.LYLJ()
ids, names, srcs = lylj.load_content()
print('Get data from network success!')
info = []
for item_id, name, src in zip(ids, names, srcs):
info.append({'id': item_id, 'name': name, 'src': src})

item_detail = get_item_detail(settings.LYLJ_IMG_DIR)

lylj_infos = utils.merge_iterables_of_dict('id', info, item_detail)
return lylj_infos


if __name__ == '__main__':
# ret = get_item_detail(settings.LYLJ_IMG_DIR)
ret = get_lylj_series()
print(ret)
Loading