I want to convert timestamp to datetime.
Here is my JSON input :
{
"preMarket_timestamp": 1646830062,
"regularMarket_timestamp": 1646773204,
"earningsTimestamp": 1643301000,
"earningsTimestampStart": 1651003200,
"earningsTimestampEnd": 1651521600
}
The JSON result i want :
{
"preMarket_timestamp": "2022/03/09 16:09:26",
"regularMarket_timestamp": "2022/03/09 00:00:04",
"earningsTimestamp": "2022/01/27 19:30:00",
"earningsTimestampStart": "2022/04/26 23:00:00",
"earningsTimestampEnd": "2022/05/02 23:00:00"
}
Is there operation to do this convertion or can I do it with Nifi Expression Language?.Im stuck here.
It does not have to be separated with the "/" operator, it can also be "-".
Use a ScriptedTransformRecord processor:
Record Reader: JsonTreeReader
Record Writer: JsonRecordSetWriter
Script Language: Groovy
Script Body:
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordFieldType;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.TimeZone;
def formatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
formatter.setTimeZone(TimeZone.getTimeZone("Europe/Istanbul"))
record.setValue("preMarket_timestamp_formatted", formatter.format(new Date(record.getAsLong("preMarket_timestamp")*1000)))
record.setValue("regularMarket_timestamp_formatted", formatter.format(new Date(record.getAsLong("regularMarket_timestamp")*1000)))
record.setValue("earningsTimestamp_formatted", formatter.format(new Date(record.getAsLong("earningsTimestamp")*1000)))
record.setValue("earningsTimestampStart_formatted", formatter.format(new Date(record.getAsLong("earningsTimestampStart")*1000)))
record.setValue("earningsTimestampEnd_formatted", formatter.format(new Date(record.getAsLong("earningsTimestampEnd")*1000)))
return record
Output json:
{
"preMarket_timestamp" : 1646830062,
"regularMarket_timestamp" : 1646773204,
"earningsTimestamp" : 1643301000,
"earningsTimestampStart" : 1651003200,
"earningsTimestampEnd" : 1651521600,
"preMarket_timestamp_formatted" : "2022/03/09 15:47:42",
"regularMarket_timestamp_formatted" : "2022/03/09 00:00:04",
"earningsTimestamp_formatted" : "2022/01/27 19:30:00",
"earningsTimestampStart_formatted" : "2022/04/26 23:00:00",
"earningsTimestampEnd_formatted" : "2022/05/02 23:00:00"
}
Related
I have a use case to read data from report.html (Example Test case name & Elapsed time) and store into MySQL, then implement Grafana dashboard wrt test case name & Elapsed time)
How can I achieve it ? How can I read data from report.html ?
Read output.xml,
You can use Python methods with this lib "xml.etree.ElementTree".
I already parse the output.xml from robotFramework for personal usage, this is an exemple for the beginning of parsing:
# -*- coding: utf:8 -*-
import os, sys
import xml.etree.ElementTree as ET
class OutputAnalyser() :
def __init__(self) :
self.xml_report_file = '/Logs/output.xml'
self.root_full_report = self.load_output_xml_results_file()
self.all_test_by_suite = self.get_all_tests_by_suite()
def load_output_xml_results_file(self):
try:
root_full_report = ET.parse(self.xml_report_file).getroot()
except FileNotFoundError as e:
raise FileNotFoundError({'errorCode' : 'FileNotFoundError', 'errorMessage' : 'File : ' + str(self.xml_report_file) + ' inexisting. Error : ' + str(e)})
return root_full_report
def get_all_tests_by_suite(self):
all_suite = [item for elem in self.root_full_report.findall('.//suite') for item in elem]
all_test_by_suite = []
for suite in all_suite:
sublist_test = {}
sublist_test["suiteName"] = suite.get('name')
sublist_test["tests"] = suite.findall('./test')
all_test_by_suite.append(sublist_test)
return all_test_by_suite
I am facing an odd issue with xcom_pull where it is always returning back a xcom_pull string
"{{ task_instance.xcom_pull(dag_id = 'cf_test',task_ids='get_config_val',key='http_con_id') }}"
My requirement is simple I have pushed an xcom using python operator and with xcom_pull I am trying to retrieve the value and pass it as an http_conn_id for SimpleHttpOperator but the variable is returning a string instead of resolving xcom_pull value.
Python Operator is successfully able to push XCom.
Code:
from datetime import datetime
import simplejson as json
from airflow.models import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator
from airflow.providers.http.operators.http import SimpleHttpOperator
from google.auth.transport.requests import Request
default_airflow_args = {
"owner": "divyaansh",
"depends_on_past": False,
"start_date": datetime(2022, 5, 18),
"retries": 0,
"schedule_interval": "#hourly",
}
project_configs = {
"project_id": "test",
"conn_id": "google_cloud_storage_default",
"bucket_name": "test-transfer",
"folder_name": "processed-test-rdf",
}
def get_config_vals(**kwargs) -> dict:
"""
Get config vals from airlfow variable and store it as xcoms
"""
task_instance = kwargs["task_instance"]
task_instance.xcom_push(key="http_con_id", value="gcp_cloud_function")
def generate_api_token(cf_name: str):
"""
generate token for api request
"""
import google.oauth2.id_token
request = Request()
target_audience = f"https://us-central1-test-a2h.cloudfunctions.net/{cf_name}"
return google.oauth2.id_token.fetch_id_token(
request=request, audience=target_audience
)
with DAG(
dag_id="cf_test",
default_args=default_airflow_args,
catchup=False,
render_template_as_native_obj=True,
) as dag:
start = DummyOperator(task_id="start")
config_vals = PythonOperator(
task_id="get_config_val", python_callable=get_config_vals, provide_context=True
)
ip_data = json.dumps(
{
"bucket_name": project_configs["bucket_name"],
"file_name": "dummy",
"target_location": "/valid",
}
)
conn_id = "{{ task_instance.xcom_pull(dag_id = 'cf_test',task_ids='get_config_val',key='http_con_id') }}"
api_token = generate_api_token("new-cp")
cf_task = SimpleHttpOperator(
task_id="file_decrypt_and_validate_cf",
http_conn_id=conn_id,
method="POST",
endpoint="new-cp",
data=json.dumps(
json.dumps(
{
"bucket_name": "test-transfer",
"file_name": [
"processed-test-rdf/dummy_20220501.txt",
"processed-test-rdf/dummy_20220502.txt",
],
"target_location": "/valid",
}
)
),
headers={
"Authorization": f"bearer {api_token}",
"Content-Type": "application/json",
},
do_xcom_push=True,
log_response=True,
)
print("task new-cp", cf_task)
check_flow = DummyOperator(task_id="check_flow")
end = DummyOperator(task_id="end")
start >> config_vals >> cf_task >> check_flow >> end
Error Message:
raise AirflowNotFoundException(f"The conn_id `{conn_id}` isn't defined") airflow.exceptions.AirflowNotFoundException: The conn_id `"{{ task_instance.xcom_pull(dag_id = 'cf_test',task_ids='get_config_val',key='http_con_id') }}"` isn't defined
I have tried several different days but nothing seems to be working.
Can someone point me to the right direction here.
Airflow-version : 2.2.3
Composer-version : 2.0.11
In SimpleHttpOperator the http_conn_id parameter is not templated field thus you can not use Jinja engine with it. This means that this parameter can not be rendered. So when you pass "{{ task_instance.xcom_pull(dag_id = 'cf_test',task_ids='get_config_val',key='http_con_id') }}" to the operator you expect it to be replaced during runtime with the value stored in Xcom by previous task but in fact Airflow consider it just as a regular string this is also what the exception tells you. It actually try to search a connection with the name of your very long string but couldn't find it so it tells you that the connection is not defined.
To solve it you can create a custom operator:
class MySimpleHttpOperator(SimpleHttpOperator):
template_fields = SimpleHttpOperator.template_fields + ("http_conn_id",)
Then you should replace SimpleHttpOperator with MySimpleHttpOperator in your DAG.
This change makes the string that you set in http_conn_id to be passed via the Jinja engine. So in your case the string will be replaced with the Xcom value as you expect.
I have this Operator, its pretty much the same as S3CopyObjectOperator except it looks for all objects in a folder and copies to a destination folder.
import os
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
from airflow.utils.decorators import apply_defaults
from common.s3.partition import Partition, PartitionType
from airflow.models import BaseOperator
import logging
class S3CopyObjectsOperator(BaseOperator):
#apply_defaults
def __init__(self,
aws_conn_id: str,
partition: Partition,
s3_bucket: str,
dest_prefix: str,
*args,
**kwargs):
super(S3CopyObjectsOperator, self).__init__(*args, **kwargs)
self.aws_conn_id = aws_conn_id
self.partition = partition
self.s3_bucket = s3_bucket
self.dest_prefix = dest_prefix
def execute(self, context):
self.partition.partition_value = context.get("execution_date")
logging.info(f'self.dest_prefix: {self.dest_prefix}')
exec_date = context.get("execution_date")
logging.info(f'self.partition.partition_value: {self.partition.partition_value}')
s3 = S3Hook(self.aws_conn_id)
s3_conn = s3.get_conn()
logging.info(f'source bucket -- self.partition.bucket: {self.partition.bucket}')
logging.info(f'source key -- self.partition.key_prefix: {self.partition.key_prefix}')
source_keys = s3.list_keys(bucket_name=self.partition.bucket, prefix=self.partition.key_prefix, delimiter="/")
logging.info(f'keys: {source_keys}')
for file in source_keys:
prefix, filename = os.path.split(file)
dest_key = f'{self.dest_prefix}/{filename}'
logging.info(f'Copying file {filename} to {self.dest_prefix}')
key = self.partition.key_prefix + filename
logging.info(f'key: {key}')
s3_conn.copy_object(Bucket=self.s3_bucket,
Key=f'{dest_key}',
CopySource={
'Bucket': self.partition.bucket,
'Key': key
}, ContentEncoding='csv')
However when I use this operator in my task I need my dest_prefix to include the execution date.
Things I've tried:
I've tried adding ds = '{{ ds_nodash }}' in the dag file but when I print self.dest_prefix in the Operator the value it returns he string value and not the execution date.
I've also tried creating a function but when I print self.dest_prefix in the Operator the value it returns is: self.dest_prefix: <function exec_value at 0x7fd008fcb940> See below for my task:
the execution date should be after snapshot_date=
for data_group in data_group_names:
copy_felix_to_s3 = S3CopyObjectsOperator(
task_id=f'copy_felix_{data_group}_data_to_s3',
aws_conn_id='aws_default',
s3_bucket='bucket_name',
partition=felixS3Partition(
bucket='source_bucket',
location_base=f'our_bucket/{data_group}',
partition_type=None
),
dest_prefix=f"felix/{data_group}/snapshot_date= ds",
dag=dag
)
copy_felix_to_s3
You are missing declaration of the parameter as templated field.
class S3CopyObjectsOperator(BaseOperator):
...
template_fields = ("dest_prefix",)
...
Macros (such as ds_nodash) are available only for templated fields thus if you don't specify template_fields it will handle the value you pass as string and it will not be rendered.
would like to get some help over here for using Cisco Genie parser. Is it possible to load the output of the CLI command (eg. "show version") into the Genie parser.
My customer pass me the output of "show version" for each of their device. I have no ssh access to their devices for security reason. I'm able to extract the output from a Python script.
But how do I load the CLI output to the Genie parser? Usually what I did is below, but this only applicable if I have ssh connection to the device:
output = device.parse("show version")
So how do I load a output string to the parse and tell it which parser to use?? I'm puzzle...
You can take the following example, here CLI is for "show interface" command:
from genie.libs.parser.ios.show_interface import ShowInterfaces
parser = ShowInterfaces(device= '', context='cli')
parsed_dict = parser.cli(output=str_op)
Here, str_op is the output from CLI command in string format
If you don't have SSH access, I can recommend the TTP module. After adding the CLI output to a notepad, you can write your own template. You can easily parse the data you want. I have given an example below.(show users)
Example Code:
from pprint import pprint
from ttp import ttp
import json
import time
with open("showUsers.txt") as f:
data_to_parse = f.read()
ttp_template = """
<group name="showUsers" method="table">
{{User|re(".?")|re(".*")}} {{Type}} {{Login_Date}} {{Login_Time}} {{Idle_day}} {{Idle_time}} --
{{Session_ID}} {{From}}
</group>
"""
parser = ttp(data=data_to_parse, template=ttp_template)
parser.parse()
# print result in JSON format
results = parser.result(format='json')[0]
print(results)
Example Run:
[
{
"showUsers": [
{
"From": "--",
"Session_ID": "6"
},
{
"Idle_day": "0d",
"Idle_time": "00:00:00",
"Login_Date": "08FEB2022",
"Login_Time": "10:53:29",
"Type": "SSHv2",
"User": "admin"
},
{
"From": "135.244.199.185",
"Session_ID": "132"
},
{
"Idle_day": "0d",
"Idle_time": "00:03:35",
"Login_Date": "09FEB2022",
"Login_Time": "11:32:50",
"Type": "SSHv2",
"User": "admin"
},
{
"From": "10.144.208.82",
"Session_ID": "143"
}
]
}
]
I have been getting errors and I do not know how to go about it. Its telling me i have an import error.
from ..items import QuotetutorialItem
ImportError: attempted relative import with no known parent package.
import scrapy
from scrapy.http import FormRequest
from scrapy.utils.response import open_in_browser
from..items import QuotetutorialItem
class Quotespider(scrapy.Spider):
name = 'quotes'
start_urls =[
'http://quotes.toscrape.com/login'
]
def parse(self, response):
token = response.css('form input::attr(value)').extract_first()
return FormRequest.from_response(response, formdata={
'csrf_token' : token,
'username' : 'abc',
'password' : '123',
}, callback=self.start_scraping)
def start_scraping(self, response):
open_in_browser(response)
items = QuotetutorialItem()
all_div_quotes = response.css('div.quote')
for quotes in all_div_quotes:
title = quotes.css('span.text::text').extract()
author = quotes.css('.author::text').extract()
tag = quotes.css('.tag::text').extract()
items['title'] = title
items['author'] = author
items['tag'] = tag
yield items